wip : try to compress just mlp

wip : experimenting
2022-10-08 15:12:15 +03:00 · 2022-10-08 14:08:43 +03:00
109 changed files with 1628 additions and 14685 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -113,73 +113,3 @@ jobs:
              run: |
                make
                ctest -L gh --output-on-failure
-
-    windows:
-        runs-on: windows-latest
-
-        strategy:
-            matrix:
-                build: [RelWithDebInfo]
-                arch: [Win32, x64]
-                blas: [ON]
-                sdl2: [ON]
-                include:
-                  - arch: Win32
-                    obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
-                    s2arc: x86
-                  - arch: x64
-                    obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
-                    s2arc: x64
-                  - sdl2: ON
-                    s2ver: 2.26.0
-
-        steps:
-            - name: Clone
-              uses: actions/checkout@v1
-
-            - name: Add msbuild to PATH
-              uses: microsoft/setup-msbuild@v1
-
-            - name: Fetch OpenBLAS
-              if: matrix.blas == 'ON'
-              run: |
-                C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
-                7z x blas.zip -oblas -y
-                copy blas/include/cblas.h .
-                copy blas/include/openblas_config.h .
-                echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
-
-            - name: Fetch SDL2 and set SDL2_DIR
-              if: matrix.sdl2 == 'ON'
-              run: |
-                C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
-                7z x sdl2.zip
-                echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
-            - name: Configure
-              run: >
-                cmake -S . -B ./build -A ${{ matrix.arch }}
-                -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-                -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
-                -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-                -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
-
-            - name: Build
-              run: |
-                cd ./build
-                msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-
-            - name: Copy libopenblas.dll
-              if: matrix.blas == 'ON'
-              run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
-
-            - name: Copy SDL2.dll
-              if: matrix.sdl2 == 'ON'
-              run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-
-            - name: Upload binaries
-              if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
-              uses: actions/upload-artifact@v1
-              with:
-                name: whisper-bin-${{ matrix.arch }}
-                path: build/bin/${{ matrix.build }}
--- a/.gitignore
+++ b/.gitignore
@ -1,29 +1,7 @@
-*.o
-.cache/
-.vs/
-.vscode/
-.DS_Store
-
-build/
-build-em/
-build-debug/
-build-release/
-build-sanitize-addr/
-build-sanitize-thread/
-
-/main
-/stream
-/command
-/talk
-/bench
-
 sync.sh
-libwhisper.so
+main
+stream
+*.o
+.cache
+build/
 compile_commands.json
-
-examples/arm_neon.h
-examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
-examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
-examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
-
-extra/bench-gg.txt
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "bindings/ios"]
-	path = bindings/ios
-	url = https://github.com/ggerganov/whisper.spm
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,5 +1,5 @@
 cmake_minimum_required (VERSION 3.0)
-project(whisper.cpp VERSION 1.0.3)
+project(whisper.cpp VERSION 1.0.0)

 set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
@ -7,73 +7,38 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
    set(WHISPER_STANDALONE ON)
-    include(cmake/GitVars.cmake)
-    include(cmake/BuildTypes.cmake)
-
-    # configure project version
-    if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
-        configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
-    endif()
-    configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
 else()
    set(WHISPER_STANDALONE OFF)
 endif()

-if (EMSCRIPTEN)
-    set(BUILD_SHARED_LIBS_DEFAULT OFF)
-
-    option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
-else()
-    if (MINGW)
-        set(BUILD_SHARED_LIBS_DEFAULT OFF)
-    else()
-        set(BUILD_SHARED_LIBS_DEFAULT ON)
-    endif()
-endif()
-
 # options

-option(BUILD_SHARED_LIBS               "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
-
-option(WHISPER_ALL_WARNINGS            "whisper: enable all compiler warnings"                   ON)
+option(WHISPER_ALL_WARNINGS            "whisper: enable all compiler warnings" ON)
 option(WHISPER_ALL_WARNINGS_3RD_PARTY  "whisper: enable all compiler warnings in 3rd party libs" OFF)

-option(WHISPER_SANITIZE_THREAD         "whisper: enable thread sanitizer"    OFF)
-option(WHISPER_SANITIZE_ADDRESS        "whisper: enable address sanitizer"   OFF)
+option(WHISPER_SANITIZE_THREAD         "whisper: enable thread sanitizer" OFF)
+option(WHISPER_SANITIZE_ADDRESS        "whisper: enable address sanitizer" OFF)
 option(WHISPER_SANITIZE_UNDEFINED      "whisper: enable undefined sanitizer" OFF)

-option(WHISPER_BUILD_TESTS             "whisper: build tests"    ${WHISPER_STANDALONE})
-option(WHISPER_BUILD_EXAMPLES          "whisper: build examples" ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_TESTS             "whisper: build tests" ${WHISPER_STANDALONE})

 option(WHISPER_SUPPORT_SDL2            "whisper: support for libSDL2" OFF)

-if (APPLE)
-    option(WHISPER_NO_ACCELERATE       "whisper: disable Accelerate framework" OFF)
-    option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
-    option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
-else()
-    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
-endif()
-
-option(WHISPER_PERF                    "whisper: enable perf timings" OFF)
-
 # sanitizers

-if (NOT MSVC)
-    if (WHISPER_SANITIZE_THREAD)
-        set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -fsanitize=thread")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
-    endif()
+if (WHISPER_SANITIZE_THREAD)
+    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} -fsanitize=thread")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+endif()

-    if (WHISPER_SANITIZE_ADDRESS)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=address -fno-omit-frame-pointer")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
-    endif()
+if (WHISPER_SANITIZE_ADDRESS)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=address -fno-omit-frame-pointer")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+endif()

-    if (WHISPER_SANITIZE_UNDEFINED)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=undefined")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
-    endif()
+if (WHISPER_SANITIZE_UNDEFINED)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=undefined")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
 endif()

 #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
@ -86,31 +51,14 @@ set(CMAKE_CXX_STANDARD 11)

 find_package(Threads REQUIRED)

-# on APPLE - include Accelerate framework
-if (APPLE AND NOT WHISPER_NO_ACCELERATE)
-    find_library(ACCELERATE_FRAMEWORK Accelerate)
-    if (ACCELERATE_FRAMEWORK)
-        message(STATUS "Accelerate framework found")
+if (WHISPER_SUPPORT_SDL2)
+    # SDL2
+    find_package(SDL2 REQUIRED)

-        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
-        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
-    else()
-        message(WARNING "Accelerate framework not found")
-    endif()
-endif()
+    string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)

-if (WHISPER_SUPPORT_OPENBLAS)
-    find_library(OPENBLAS_LIB
-        NAMES openblas libopenblas
-        )
-    if (OPENBLAS_LIB)
-        message(STATUS "OpenBLAS found")
-
-        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${OPENBLAS_LIB})
-        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
-    else()
-        message(WARNING "OpenBLAS not found")
-    endif()
+    message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
+    message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
 endif()

 # compiler flags
@ -121,7 +69,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 endif ()

 if (WHISPER_ALL_WARNINGS)
-    if (NOT MSVC)
+    if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
            -Wall                           \
            -Wextra                         \
@ -132,14 +80,12 @@ if (WHISPER_ALL_WARNINGS)
            -Wpointer-arith                 \
        ")
    else()
-        # todo : msvc
+        # todo : windows
    endif()
 endif()

-if (NOT MSVC)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
-    #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
-endif()
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")

 message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

@ -147,32 +93,10 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
    message(STATUS "ARM detected")
 else()
    message(STATUS "x86 detected")
-    if (MSVC)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
-        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
-    else()
-        if (EMSCRIPTEN)
-            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
-        else()
-            if(NOT WHISPER_NO_AVX)
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
-            endif()
-            if(NOT WHISPER_NO_AVX2)
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
-            endif()
-            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma -mf16c")
-        endif()
-    endif()
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c")
 endif()

-if (WHISPER_PERF)
-    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
-endif()
-
-#
 # whisper - this is the main library of the project
-#

 set(TARGET whisper)

@ -185,13 +109,7 @@ target_include_directories(${TARGET} PUBLIC
    .
    )

-if (MSVC)
-    target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
-
-    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
-else()
-    target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
-endif()
+target_link_libraries(${TARGET} PRIVATE ${CMAKE_THREAD_LIBS_INIT})

 if (BUILD_SHARED_LIBS)
    target_link_libraries(${TARGET} PUBLIC
@ -203,10 +121,6 @@ if (BUILD_SHARED_LIBS)
        )
 endif()

-if (EMSCRIPTEN)
-    set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
-endif()
-
 target_compile_definitions(${TARGET} PUBLIC
    ${WHISPER_EXTRA_FLAGS}
    )
@ -216,21 +130,24 @@ install(TARGETS ${TARGET}
    ARCHIVE DESTINATION lib/static
    )

-#
-# bindings
-#
-
-add_subdirectory(bindings)
-
-#
 # programs, examples and tests
-#

-if (WHISPER_BUILD_TESTS)
-    enable_testing()
-    add_subdirectory(tests)
+if (WHISPER_STANDALONE)
+    # main
+    set(TARGET main)
+    add_executable(${TARGET} main.cpp)
+    target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
+
+    if (WHISPER_SUPPORT_SDL2)
+        # stream
+        set(TARGET stream)
+        add_executable(${TARGET} stream.cpp)
+        target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
+        target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+    endif ()
+
+    if (WHISPER_BUILD_TESTS)
+        enable_testing()
+        add_subdirectory(tests)
+    endif ()
 endif ()
-
-if (WHISPER_BUILD_EXAMPLES)
-    add_subdirectory(examples)
-endif()
--- a/166
+++ b/166
@ -1,35 +1,16 @@
-ifndef UNAME_S
 UNAME_S := $(shell uname -s)
-endif
-
-ifndef UNAME_P
 UNAME_P := $(shell uname -p)
-endif
-
-ifndef UNAME_M
 UNAME_M := $(shell uname -m)
-endif
-
-# Mac OS + Arm can report x86_64
-# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
-ifeq ($(UNAME_S),Darwin)
-	ifneq ($(UNAME_P),arm)
-		SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
-		ifeq ($(SYSCTL_M),1)
-			# UNAME_P := arm
-			# UNAME_M := arm64
-			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
-		endif
-	endif
-endif

 #
 # Compile flags
 #

-CFLAGS   = -I.              -O3 -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
-LDFLAGS  =
+CFLAGS   = -O3 -std=c11
+CXXFLAGS = -O3 -std=c++11
+
+CFLAGS   += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function

 # OS specific
 # TODO: support Windows
@ -41,88 +22,17 @@ ifeq ($(UNAME_S),Darwin)
 	CFLAGS   += -pthread
 	CXXFLAGS += -pthread
 endif
-ifeq ($(UNAME_S),FreeBSD)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif
-ifeq ($(UNAME_S),Haiku)
-	CFLAGS   += -pthread
-	CXXFLAGS += -pthread
-endif

 # Architecture specific
-# TODO: probably these flags need to be tweaked on some architectures
-#       feel free to update the Makefile for your architecture and send a pull request or issue
-ifeq ($(UNAME_M),x86_64)
-	ifeq ($(UNAME_S),Darwin)
-		CFLAGS += -mfma -mf16c
-		AVX1_M := $(shell sysctl machdep.cpu.features)
-		ifneq (,$(findstring AVX1.0,$(AVX1_M)))
-			CFLAGS += -mavx
-		endif
-		AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
-		ifneq (,$(findstring AVX2,$(AVX2_M)))
-			CFLAGS += -mavx2
-		endif
-	else ifeq ($(UNAME_S),Linux)
-		AVX1_M := $(shell grep "avx " /proc/cpuinfo)
-		ifneq (,$(findstring avx,$(AVX1_M)))
-			CFLAGS += -mavx
-		endif
-		AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
-		ifneq (,$(findstring avx2,$(AVX2_M)))
-			CFLAGS += -mavx2
-		endif
-		FMA_M := $(shell grep "fma " /proc/cpuinfo)
-		ifneq (,$(findstring fma,$(FMA_M)))
-			CFLAGS += -mfma
-		endif
-		F16C_M := $(shell grep "f16c " /proc/cpuinfo)
-		ifneq (,$(findstring f16c,$(F16C_M)))
-			CFLAGS += -mf16c
-		endif
-	else ifeq ($(UNAME_S),Haiku)
-		AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
-		ifneq (,$(findstring avx,$(AVX1_M)))
-			CFLAGS += -mavx
-		endif
-		AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
-		ifneq (,$(findstring avx2,$(AVX2_M)))
-			CFLAGS += -mavx2
-		endif
-		FMA_M := $(shell sysinfo -cpu | grep "FMA ")
-		ifneq (,$(findstring fma,$(FMA_M)))
-			CFLAGS += -mfma
-		endif
-		F16C_M := $(shell sysinfo -cpu | grep "F16C ")
-		ifneq (,$(findstring f16c,$(F16C_M)))
-			CFLAGS += -mf16c
-		endif
-	else
-		CFLAGS += -mfma -mf16c -mavx -mavx2
-	endif
-endif
-ifeq ($(UNAME_M),amd64)
+ifeq ($(UNAME_P),x86_64)
 	CFLAGS += -mavx -mavx2 -mfma -mf16c
 endif
-ifndef WHISPER_NO_ACCELERATE
-	# Mac M1 - include Accelerate framework
-	ifeq ($(UNAME_S),Darwin)
-		CFLAGS  += -DGGML_USE_ACCELERATE
-		LDFLAGS += -framework Accelerate
+ifneq ($(filter arm%,$(UNAME_P)),)
+	# Mac M1
+endif
+ifneq ($(filter aarch64%,$(UNAME_P)),)
 	endif
-endif
-ifdef WHISPER_OPENBLAS
-	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
-	LDFLAGS += -lopenblas
-endif
-ifdef WHISPER_GPROF
-	CFLAGS  += -pg
-	CXXFLAGS  += -pg
-endif
-ifneq ($(filter aarch64%,$(UNAME_M)),)
-endif
-ifneq ($(filter armv6%,$(UNAME_M)),)
+	ifneq ($(filter armv6%,$(UNAME_M)),)
 	# Raspberry Pi 1, 2, 3
 	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 endif
@ -135,26 +45,22 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif

-default: main
+#
+# Build library + main
+#

-#
-# Build library
-#
+main: main.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main
+	./main -h

 ggml.o: ggml.c ggml.h
-	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o
+	$(CC)  $(CFLAGS)   -c ggml.c

 whisper.o: whisper.cpp whisper.h
-	$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
-
-libwhisper.a: ggml.o whisper.o
-	$(AR) rcs libwhisper.a ggml.o whisper.o
-
-libwhisper.so: ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) -c whisper.cpp

 clean:
-	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
+	rm -f *.o main

 #
 # Examples
@ -162,21 +68,8 @@ clean:

 CC_SDL=`sdl2-config --cflags --libs`

-main: examples/main/main.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o whisper.o -o main $(LDFLAGS)
-	./main -h
-
-stream: examples/stream/stream.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
-
-command: examples/command/command.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
-
-talk: examples/talk/talk.cpp  examples/talk/gpt-2.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
-
-bench: examples/bench/bench.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+stream: stream.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL)

 #
 # Audio samples
@ -213,11 +106,10 @@ samples:
 .PHONY: small
 .PHONY: medium.en
 .PHONY: medium
-.PHONY: large-v1
 .PHONY: large

-tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
-	bash ./models/download-ggml-model.sh $@
+tiny.en tiny base.en base small.en small medium.en medium large: main
+	bash ./download-ggml-model.sh $@
 	@echo ""
 	@echo "==============================================="
 	@echo "Running $@ on all samples in ./samples ..."
@ -225,17 +117,9 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
 	@echo ""
 	@for f in samples/*.wav; do \
 		echo "----------------------------------------------" ; \
-		echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
+		echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
 	    echo "----------------------------------------------" ; \
 		echo "" ; \
 		./main -m models/ggml-$@.bin -f $$f ; \
 		echo "" ; \
 	done
-
-#
-# Tests
-#
-
-.PHONY: tests
-tests:
-	bash ./tests/run-tests.sh
--- a/README.md
+++ b/README.md
@ -2,73 +2,30 @@

 [![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

 - Plain C/C++ implementation without dependencies
- Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
- AVX intrinsics support for x86 architectures
+- ARM_NEON and AVX intrinsics support
 - Mixed F16 / F32 precision
 - Low memory usage (Flash Attention + Flash Forward)
 - Zero memory allocations at runtime
 - Runs on the CPU
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
+- Supported platforms: Linux, Mac OS (Intel and Arm), Raspberry Pi, Android

-Supported platforms:
+## Usage

- [x] Mac OS (Intel and Arm)
- [x] [iOS](examples/whisper.objc)
- [x] Linux
- [x] [WebAssembly](examples/whisper.wasm)
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
- [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
-
-The entire implementation of the model is contained in 2 source files:
-
- Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
- Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
-
-Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
-As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
-
-https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
-
-You can also easily make your own offline voice assistant application: [command](examples/command)
-
-https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
-
-Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
-
-## Implementation details
-
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
- Sample usage is demonstrated in [main.cpp](examples/main)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
- Various other examples are available in the [examples](examples) folder
-
-The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
-instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
-the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
-
-## Quick start
-
-First, download one of the Whisper models converted in [ggml format](models). For example:
+To build the main program, run `make`. You can then transcribe a `.wav` file like this:

 ```bash
-bash ./models/download-ggml-model.sh base.en
+$ ./main -f input.wav
 ```

-Now build the [main](examples/main) example and transcribe an audio file like this:
+Before running the program, make sure to download one of the ggml Whisper models. For example:

 ```bash
-# build the main example
-make
-
-# transcribe an audio file
-./main -f input.wav
+bash ./download-ggml-model.sh base.en
 ```

 ---
@ -77,40 +34,28 @@ For a quick demo, simply run `make base.en`:

 ```java
 $ make base.en
-
-cc  -I.              -O3 -std=c11   -pthread -DGGML_USE_ACCELERATE   -c ggml.c -o ggml.o
-c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
-c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main  -framework Accelerate
+cc  -O3 -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread   -c ggml.c
+c++ -O3 -std=c++11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread -c whisper.cpp
+c++ -O3 -std=c++11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread main.cpp whisper.o ggml.o -o main
 ./main -h

 usage: ./main [options] file0.wav file1.wav ...

 options:
-  -h,       --help          [default] show this help message and exit
-  -t N,     --threads N     [4      ] number of threads to use during computation
-  -p N,     --processors N  [1      ] number of processors to use during computation
-  -ot N,    --offset-t N    [0      ] time offset in milliseconds
-  -on N,    --offset-n N    [0      ] segment index offset
-  -d  N,    --duration N    [0      ] duration of audio to process in milliseconds
-  -mc N,    --max-context N [-1     ] maximum number of text context tokens to store
-  -ml N,    --max-len N     [0      ] maximum segment length in characters
-  -wt N,    --word-thold N  [0.01   ] word timestamp probability threshold
-  -su,      --speed-up      [false  ] speed up audio by x2 (reduced accuracy)
-  -tr,      --translate     [false  ] translate from source language to english
-  -otxt,    --output-txt    [false  ] output result in a text file
-  -ovtt,    --output-vtt    [false  ] output result in a vtt file
-  -osrt,    --output-srt    [false  ] output result in a srt file
-  -owts,    --output-words  [false  ] output script for generating karaoke video
-  -ps,      --print-special [false  ] print special tokens
-  -pc,      --print-colors  [false  ] print colors
-  -nt,      --no-timestamps [true   ] do not print timestamps
-  -l LANG,  --language LANG [en     ] spoken language
-  -m FNAME, --model FNAME   [models/ggml-base.en.bin] model path
-  -f FNAME, --file FNAME    [       ] input WAV file path
+  -h,       --help           show this help message and exit
+  -s SEED,  --seed SEED      RNG seed (default: -1)
+  -t N,     --threads N      number of threads to use during computation (default: 4)
+  -v,       --verbose        verbose output
+            --translate      translate from source language to english
+  -ps,      --print_special  print special tokens
+  -nt,      --no_timestamps  do not print timestamps
+  -l LANG,  --language LANG  spoken language (default: en)
+  -m FNAME, --model FNAME    model path (default: models/ggml-base.en.bin)
+  -f FNAME, --file FNAME     input WAV file path

-bash ./models/download-ggml-model.sh base.en
+bash ./download-ggml-model.sh base.en
 Downloading ggml model base.en ...
-ggml-base.en.bin               100%[========================>] 141.11M  6.34MB/s    in 24s
+models/ggml-base.en.bin            100%[===================================>] 141.11M  6.49MB/s    in 23s
 Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
 You can now use it like this:

@ -138,33 +83,30 @@ whisper_model_load: n_text_layer  = 6
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 2
+whisper_model_load: mem_required  = 377.00 MB
 whisper_model_load: adding 1607 extra tokens
-whisper_model_load: mem_required  =  506.00 MB
-whisper_model_load: ggml ctx size =  140.60 MB
-whisper_model_load: memory size   =   22.83 MB
-whisper_model_load: model size    =  140.54 MB
+whisper_model_load: ggml ctx size = 163.43 MB
+whisper_model_load: memory size =    22.83 MB
+whisper_model_load: model size  =   140.54 MB

-system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
+main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, lang = en, task = transcribe, timestamps = 1 ...

-main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+[00:00.000 --> 00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.


-[00:00:00.000 --> 00:00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
-
-
-whisper_print_timings:     load time =   105.91 ms
-whisper_print_timings:      mel time =    24.62 ms
-whisper_print_timings:   sample time =     3.63 ms
-whisper_print_timings:   encode time =   324.71 ms / 54.12 ms per layer
-whisper_print_timings:   decode time =    83.58 ms / 13.93 ms per layer
-whisper_print_timings:    total time =   542.81 ms
+whisper_print_timings:     load time =    77.48 ms
+whisper_print_timings:      mel time =    26.10 ms
+whisper_print_timings:   sample time =     2.19 ms
+whisper_print_timings:   encode time =   632.95 ms / 105.49 ms per layer
+whisper_print_timings:   decode time =    85.11 ms / 14.18 ms per layer
+whisper_print_timings:    total time =   824.14 ms
 ```

 The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.

 For detailed usage instructions, run: `./main -h`

-Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
+Note that `whisper.cpp` currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
 For example, you can use `ffmpeg` like this:

 ```java
@ -192,42 +134,13 @@ make small.en
 make small
 make medium.en
 make medium
-make large-v1
 make large
 ```

-## Memory usage
-
-| Model  | Disk   | Mem     | SHA                                        |
-| ---    | ---    | ---     | ---                                        |
-| tiny   |  75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
-| base   | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
-| small  | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
-| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
-| large  | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
-
-## Limitations
-
- Inference only
- No GPU support
- Very basic greedy sampling scheme - always pick up the token with highest probability.
-  This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
-  from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
-  to run the python code with the following parameters:
-
-  ```
-  whisper --best_of None --beam_size None ...
-  ```
-
-  In the future, `whisper.cpp` will support more sampling strategies.
-
 ## Another example

 Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
-in about half a minute on a MacBook M1 Pro, using `medium.en` model:
-
-<details>
-  <summary>Expand to see the result</summary>
+in less than a minute on a MacBook M1 Pro, using `medium.en` model:

 ```java
 $ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
@ -245,187 +158,86 @@ whisper_model_load: n_text_layer  = 24
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 4
-whisper_model_load: mem_required  = 2610.00 MB
+whisper_model_load: mem_required  = 2502.00 MB
 whisper_model_load: adding 1607 extra tokens
 whisper_model_load: ggml ctx size = 1644.97 MB
 whisper_model_load: memory size =   182.62 MB
 whisper_model_load: model size  =  1462.12 MB
+log_mel_spectrogram: n_sample = 3179750, n_len = 19873
+log_mel_spectrogram: recording length: 198.734375 s

-main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, lang = en, task = transcribe, timestamps = 1 ...
+main: processing 3179750 samples (198.7 sec), 8 threads, lang = english, task = transcribe, timestamps = 1 ...

 [00:00.000 --> 00:08.000]   My fellow Americans, this day has brought terrible news and great sadness to our country.
-[00:08.000 --> 00:17.000]   At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
-[00:17.000 --> 00:23.000]   A short time later, debris was seen falling from the skies above Texas.
-[00:23.000 --> 00:29.000]   The Columbia's lost. There are no survivors.
+[00:08.000 --> 00:17.000]   At 9 o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
+[00:17.000 --> 00:24.000]   A short time later, debris was seen falling from the skies above Texas.
+[00:24.000 --> 00:29.000]   The Columbia's lost. There are no survivors.
 [00:29.000 --> 00:32.000]   On board was a crew of seven.
-[00:32.000 --> 00:39.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
-[00:39.000 --> 00:48.000]   Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
-[00:48.000 --> 00:52.000]   a colonel in the Israeli Air Force.
+[00:32.000 --> 00:43.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain David Brown, Commander William McCool,
+[00:43.000 --> 00:52.000]   Dr. Kultner Aschavla, and Elon Ramon, a Colonel in the Israeli Air Force.
 [00:52.000 --> 00:58.000]   These men and women assumed great risk in the service to all humanity.
-[00:58.000 --> 01:03.000]   In an age when space flight has come to seem almost routine,
-[01:03.000 --> 01:07.000]   it is easy to overlook the dangers of travel by rocket
-[01:07.000 --> 01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
-[01:12.000 --> 01:18.000]   These astronauts knew the dangers, and they faced them willingly,
-[01:18.000 --> 01:23.000]   knowing they had a high and noble purpose in life.
-[01:23.000 --> 01:31.000]   Because of their courage and daring and idealism, we will miss them all the more.
-[01:31.000 --> 01:36.000]   All Americans today are thinking as well of the families of these men and women
-[01:36.000 --> 01:40.000]   who have been given this sudden shock and grief.
-[01:40.000 --> 01:45.000]   You're not alone. Our entire nation grieves with you,
-[01:45.000 --> 01:52.000]   and those you love will always have the respect and gratitude of this country.
+[00:58.000 --> 01:06.000]   In an age when space flight has come to seem almost routine, it is easy to overlook the dangers of travel by rocket
+[01:06.000 --> 01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
+[01:12.000 --> 01:22.000]   These astronauts knew the dangers, and they faced them willingly, knowing they had a high and noble purpose in life.
+[01:22.000 --> 01:30.000]   Because of their courage, endearing, and idealism, we will miss them all the more.
+[01:30.000 --> 01:40.000]   All Americans today are thinking as well of the families of these men and women who have been given this sudden shock and grief.
+[01:40.000 --> 01:45.000]   You're not alone. Our entire nation agrees with you.
+[01:45.000 --> 01:52.000]   And those you love will always have the respect and gratitude of this country.
 [01:52.000 --> 01:56.000]   The cause in which they died will continue.
-[01:56.000 --> 02:04.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery
-[02:04.000 --> 02:11.000]   and the longing to understand. Our journey into space will go on.
+[01:56.000 --> 02:07.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery and the longing to understand.
+[02:07.000 --> 02:11.000]   Our journey into space will go on.
 [02:11.000 --> 02:16.000]   In the skies today, we saw destruction and tragedy.
 [02:16.000 --> 02:22.000]   Yet farther than we can see, there is comfort and hope.
-[02:22.000 --> 02:29.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
-[02:29.000 --> 02:35.000]   who created all these. He who brings out the starry hosts one by one
-[02:35.000 --> 02:39.000]   and calls them each by name."
-[02:39.000 --> 02:46.000]   Because of His great power and mighty strength, not one of them is missing.
-[02:46.000 --> 02:55.000]   The same Creator who names the stars also knows the names of the seven souls we mourn today.
-[02:55.000 --> 03:01.000]   The crew of the shuttle Columbia did not return safely to earth,
-[03:01.000 --> 03:05.000]   yet we can pray that all are safely home.
-[03:05.000 --> 03:13.000]   May God bless the grieving families, and may God continue to bless America.
-[03:13.000 --> 03:41.000]   Audio
+[02:22.000 --> 02:31.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens who created all these.
+[02:31.000 --> 02:39.000]   He who brings out the starry hosts one by one and calls them each by name."
+[02:39.000 --> 02:46.000]   Because of his great power and mighty strength, not one of them is missing.
+[02:46.000 --> 02:55.000]   The same creator who names the stars also knows the names of the seven souls we mourn today.
+[02:55.000 --> 03:05.000]   The crew of the shuttle Columbia did not return safely to Earth, yet we can pray that all are safely home.
+[03:05.000 --> 03:14.000]   May God bless the grieving families and may God continue to bless America.
+[03:14.000 --> 03:24.000]   [Music]


-whisper_print_timings:     load time =   575.92 ms
-whisper_print_timings:      mel time =   230.60 ms
-whisper_print_timings:   sample time =    73.19 ms
-whisper_print_timings:   encode time = 19552.61 ms / 814.69 ms per layer
-whisper_print_timings:   decode time = 13249.96 ms / 552.08 ms per layer
-whisper_print_timings:    total time = 33686.27 ms
+main:     load time =   522.18 ms
+main:      mel time =   423.43 ms
+main:   sample time =    31.42 ms
+main:   encode time = 41518.51 ms / 1729.94 ms per layer
+main:   decode time = 14907.22 ms
+main:    total time = 57416.63 ms
 ```
-</details>

 ## Real-time audio input example

 This is a naive example of performing real-time inference on audio from your microphone.
-The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
-More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
+The `stream` tool samples the audio every 3 seconds and runs the transcription continously. More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).

 ```java
-./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+$ ./stream -m models/ggml-small.en.bin -t 8
 ```

-https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+https://user-images.githubusercontent.com/1991296/193465125-c163d304-64f6-4f5d-83e5-72239c9a203e.mp4

-## Confidence color-coding
+## Implementation details

-Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
-to highlight words with high or low confidence:
+- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
+- The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
+- Simple usage is demonstrated in [main.cpp](main.cpp)
+- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp)

-<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
+## Limitations

-## Controlling the length of the generated text segments (experimental)
+- Very basic greedy sampling scheme - always pick up the top token. You can implement your own strategy
+- Inference only
+- No GPU support

-For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: 
+## Memory usage

-```java
-./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.850]   And so my
-[00:00:00.850 --> 00:00:01.590]   fellow
-[00:00:01.590 --> 00:00:04.140]   Americans, ask
-[00:00:04.140 --> 00:00:05.660]   not what your
-[00:00:05.660 --> 00:00:06.840]   country can do
-[00:00:06.840 --> 00:00:08.430]   for you, ask
-[00:00:08.430 --> 00:00:09.440]   what you can do
-[00:00:09.440 --> 00:00:10.020]   for your
-[00:00:10.020 --> 00:00:11.000]   country.
-```
-
-## Word-level timestamp
-
-The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
-
-```java
-./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.320]  
-[00:00:00.320 --> 00:00:00.370]   And
-[00:00:00.370 --> 00:00:00.690]   so
-[00:00:00.690 --> 00:00:00.850]   my
-[00:00:00.850 --> 00:00:01.590]   fellow
-[00:00:01.590 --> 00:00:02.850]   Americans
-[00:00:02.850 --> 00:00:03.300]  ,
-[00:00:03.300 --> 00:00:04.140]   ask
-[00:00:04.140 --> 00:00:04.990]   not
-[00:00:04.990 --> 00:00:05.410]   what
-[00:00:05.410 --> 00:00:05.660]   your
-[00:00:05.660 --> 00:00:06.260]   country
-[00:00:06.260 --> 00:00:06.600]   can
-[00:00:06.600 --> 00:00:06.840]   do
-[00:00:06.840 --> 00:00:07.010]   for
-[00:00:07.010 --> 00:00:08.170]   you
-[00:00:08.170 --> 00:00:08.190]  ,
-[00:00:08.190 --> 00:00:08.430]   ask
-[00:00:08.430 --> 00:00:08.910]   what
-[00:00:08.910 --> 00:00:09.040]   you
-[00:00:09.040 --> 00:00:09.320]   can
-[00:00:09.320 --> 00:00:09.440]   do
-[00:00:09.440 --> 00:00:09.760]   for
-[00:00:09.760 --> 00:00:10.020]   your
-[00:00:10.020 --> 00:00:10.510]   country
-[00:00:10.510 --> 00:00:11.000]  .
-```
-
-## Karaoke-style movie generation (experimental)
-
-The [main](examples/main) example provides support for output of karaoke-style movies, where the
-currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
-This requires to have `ffmpeg` installed.
-
-Here are a few *"typical"* examples:
-
-```java
-./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
-source ./samples/jfk.wav.wts
-ffplay ./samples/jfk.wav.mp4
-```
-
-https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
-
---
-
-```java
-./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
-source ./samples/mm0.wav.wts
-ffplay ./samples/mm0.wav.mp4
-```
-
-https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
-
---
-
-```java
-./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
-source ./samples/gb0.wav.wts
-ffplay ./samples/gb0.wav.mp4
-```
-
-https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
-
---
-
-## Benchmarks
-
-In order to have an objective comparison of the performance of the inference across different system configurations,
-use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
-took to execute it. The results are summarized in the following Github issue:
-
-[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
+| Model  | Disk   | Mem     |
+| ---    | ---    | ---     |
+| tiny   |  75 MB | ~240 MB |
+| base   | 142 MB | ~380 MB |
+| small  | 466 MB | ~970 MB |
+| medium | 1.5 GB | ~2.5 GB |
+| large  | 2.9 GB | ~4.6 GB |

 ## ggml format

@ -436,43 +248,6 @@ The original models are converted to a custom binary format. This allows to pack
 - vocabulary
 - weights

-You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
-or manually from here:
+You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script.

- https://huggingface.co/datasets/ggerganov/whisper.cpp
- https://ggml.ggerganov.com
-
-For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
-in [models](models).
-
-## Bindings
-
- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs)
- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm)
- [X] Javascript: [bindings/javascript](bindings/javascript)
- [ ] Python: soon
-
-## Examples
-
-There are various examples of using the library for different projects in the [examples](examples) folder.
-Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
-
-| Example | Web | Description |
-| ---     | --- | ---         |
-| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
-| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
-| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
-| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
-| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
-| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
-| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
-| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
-| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
-| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
-
-## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
-
-If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
-You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
-to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
-[Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
+For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models).
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@ -1,19 +0,0 @@
-if (EMSCRIPTEN)
-    add_subdirectory(javascript)
-
-    add_custom_command(
-        OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
-        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
-        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
-        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
-        COMMAND npm publish
-        COMMAND touch publish.log
-        COMMENT "Publishing npm module v${PROJECT_VERSION}"
-        VERBATIM
-        )
-
-    add_custom_target(publish-npm
-        DEPENDS javascript/publish.log
-        )
-endif()
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/.gitignore
+++ b/bindings/javascript/.gitignore
@ -1 +0,0 @@
-publish.log
--- a/bindings/javascript/CMakeLists.txt
+++ b/bindings/javascript/CMakeLists.txt
@ -1,41 +0,0 @@
-set(TARGET libwhisper)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside whisper.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libwhisper.js
-        ${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
-        )
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libwhisper.worker.js
-        ${CMAKE_CURRENT_SOURCE_DIR}/libwhisper.worker.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s MODULARIZE=1 \
-    -s EXPORT_NAME=\"'whisper_factory'\" \
-    -s FORCE_FILESYSTEM=1 \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s ALLOW_MEMORY_GROWTH=1 \
-    ${EXTRA_FLAGS} \
-    ")
--- a/bindings/javascript/README.md
+++ b/bindings/javascript/README.md
@ -1,78 +0,0 @@
-# whisper.cpp
-
-Node.js package for Whisper speech recognition
-
-Package: https://www.npmjs.com/package/whisper.cpp
-
-## Details
-
-The performance is comparable to when running `whisper.cpp` in the browser via WASM.
-
-The API is currently very rudimentary: [bindings/javascript/emscripten.cpp](/bindings/javascript/emscripten.cpp)
-
-For sample usage check [tests/test-whisper.js](/tests/test-whisper.js)
-
-## Package building + test
-
-```bash
-# load emscripten
-source /path/to/emsdk/emsdk_env.sh
-
-# clone repo
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-
-# grab base.en model
-./models/download-ggml-model.sh base.en
-
-# prepare PCM sample for testing
-ffmpeg -i samples/jfk.wav -f f32le -acodec pcm_f32le samples/jfk.pcmf32
-
-# build
-mkdir build-em && cd build-em
-emcmake cmake .. && make -j
-
-# run test
-node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
-
-# publish npm package
-make publish-npm
-```
-
-## Sample run
-
-```java
-$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
-
-whisper_model_load: loading model from 'whisper.bin'
-whisper_model_load: n_vocab       = 51864
-whisper_model_load: n_audio_ctx   = 1500
-whisper_model_load: n_audio_state = 512
-whisper_model_load: n_audio_head  = 8
-whisper_model_load: n_audio_layer = 6
-whisper_model_load: n_text_ctx    = 448
-whisper_model_load: n_text_state  = 512
-whisper_model_load: n_text_head   = 8
-whisper_model_load: n_text_layer  = 6
-whisper_model_load: n_mels        = 80
-whisper_model_load: f16           = 1
-whisper_model_load: type          = 2
-whisper_model_load: adding 1607 extra tokens
-whisper_model_load: mem_required  =  506.00 MB
-whisper_model_load: ggml ctx size =  140.60 MB
-whisper_model_load: memory size   =   22.83 MB
-whisper_model_load: model size    =  140.54 MB
-
-system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 | 
-
-operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
-
-[00:00:00.000 --> 00:00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
-
-whisper_print_timings:     load time =   162.37 ms
-whisper_print_timings:      mel time =   183.70 ms
-whisper_print_timings:   sample time =     4.27 ms
-whisper_print_timings:   encode time =  8582.63 ms / 1430.44 ms per layer
-whisper_print_timings:   decode time =   436.16 ms / 72.69 ms per layer
-whisper_print_timings:    total time =  9370.90 ms
-```
--- a/bindings/javascript/emscripten.cpp
+++ b/bindings/javascript/emscripten.cpp
@ -1,93 +0,0 @@
-//
-// This is the Javascript API of whisper.cpp
-//
-// Very crude at the moment.
-// Feel free to contribute and make this better!
-//
-// See the tests/test-whisper.js for sample usage
-//
-
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <thread>
-#include <vector>
-
-struct whisper_context * g_context;
-
-EMSCRIPTEN_BINDINGS(whisper) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        if (g_context == nullptr) {
-            g_context = whisper_init(path_model.c_str());
-            if (g_context != nullptr) {
-                return true;
-            } else {
-                return false;
-            }
-        }
-
-        return false;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([]() {
-        if (g_context) {
-            whisper_free(g_context);
-            g_context = nullptr;
-        }
-    }));
-
-    emscripten::function("full_default", emscripten::optional_override([](const emscripten::val & audio, const std::string & lang, bool translate) {
-        if (g_context == nullptr) {
-            return -1;
-        }
-
-        struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-        params.print_realtime   = true;
-        params.print_progress   = false;
-        params.print_timestamps = true;
-        params.print_special    = false;
-        params.translate        = translate;
-        params.language         = whisper_is_multilingual(g_context) ? lang.c_str() : "en";
-        params.n_threads        = std::min(8, (int) std::thread::hardware_concurrency());
-        params.offset_ms        = 0;
-
-        std::vector<float> pcmf32;
-        const int n = audio["length"].as<int>();
-
-        emscripten::val heap = emscripten::val::module_property("HEAPU8");
-        emscripten::val memory = heap["buffer"];
-
-        pcmf32.resize(n);
-
-        emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
-        memoryView.call<void>("set", audio);
-
-        // print system information
-        {
-            printf("\n");
-            printf("system_info: n_threads = %d / %d | %s\n",
-                    params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
-
-            printf("\n");
-            printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
-                    __func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                    params.n_threads, 1,
-                    params.language,
-                    params.translate ? "translate" : "transcribe");
-
-            printf("\n");
-        }
-
-        // run whisper
-        {
-            whisper_reset_timings(g_context);
-            whisper_full(g_context, params, pcmf32.data(), pcmf32.size());
-            whisper_print_timings(g_context);
-        }
-
-        return 0;
-    }));
-}
--- a/bindings/javascript/libwhisper.worker.js
+++ b/bindings/javascript/libwhisper.worker.js
@ -1 +0,0 @@
-"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
--- a/bindings/javascript/package-tmpl.json
+++ b/bindings/javascript/package-tmpl.json
@ -1,26 +0,0 @@
-{
-  "name": "whisper.cpp",
-  "version": "@PROJECT_VERSION@",
-  "description": "Whisper speech recognition",
-  "main": "whisper.js",
-  "scripts": {
-    "test": "echo \"todo: add tests\" && exit 0"
-  },
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/ggerganov/whisper.cpp"
-  },
-  "keywords": [
-    "openai",
-    "whisper",
-    "speech-to-text",
-    "speech-recognition",
-    "transformer"
-  ],
-  "author": "Georgi Gerganov",
-  "license": "MIT",
-  "bugs": {
-    "url": "https://github.com/ggerganov/whisper.cpp/issues"
-  },
-  "homepage": "https://github.com/ggerganov/whisper.cpp#readme"
-}
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,26 +0,0 @@
-{
-  "name": "whisper.cpp",
-  "version": "1.0.3",
-  "description": "Whisper speech recognition",
-  "main": "whisper.js",
-  "scripts": {
-    "test": "echo \"todo: add tests\" && exit 0"
-  },
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/ggerganov/whisper.cpp"
-  },
-  "keywords": [
-    "openai",
-    "whisper",
-    "speech-to-text",
-    "speech-recognition",
-    "transformer"
-  ],
-  "author": "Georgi Gerganov",
-  "license": "MIT",
-  "bugs": {
-    "url": "https://github.com/ggerganov/whisper.cpp/issues"
-  },
-  "homepage": "https://github.com/ggerganov/whisper.cpp#readme"
-}
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/cmake/BuildTypes.cmake
+++ b/cmake/BuildTypes.cmake
@ -1,54 +0,0 @@
-# Add new build types
-
-# ReleaseGG - Release with enabled asserts
-
-SET(CMAKE_CXX_FLAGS_RELEASEGG
-    "-O3"
-    CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
-    FORCE )
-SET(CMAKE_C_FLAGS_RELEASEGG
-    "-O3"
-    CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
-    FORCE )
-SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
-    ""
-    CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
-    FORCE )
-SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
-    ""
-    CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
-    FORCE )
-MARK_AS_ADVANCED(
-    CMAKE_CXX_FLAGS_RELEASEGG
-    CMAKE_C_FLAGS_RELEASEGG
-    CMAKE_EXE_LINKER_FLAGS_RELEASEGG
-    CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
-
-# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
-
-SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
-    "-O2 -g"
-    CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
-    FORCE )
-SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
-    "-O2 -g"
-    CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
-    FORCE )
-SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
-    ""
-    CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
-    FORCE )
-SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
-    ""
-    CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
-    FORCE )
-MARK_AS_ADVANCED(
-    CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
-    CMAKE_C_FLAGS_RELWITHDEBINFOGG
-    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
-    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
-
-if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
-    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
-endif()
--- a/cmake/GitVars.cmake
+++ b/cmake/GitVars.cmake
@ -1,22 +0,0 @@
-find_package(Git)
-
-# the commit's SHA1
-execute_process(COMMAND
-    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
-    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
-    OUTPUT_VARIABLE GIT_SHA1
-    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-# the date of the commit
-execute_process(COMMAND
-    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
-    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
-    OUTPUT_VARIABLE GIT_DATE
-    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-# the subject of the commit
-execute_process(COMMAND
-    "${GIT_EXECUTABLE}" log -1 --format=%s
-    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
-    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
-    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/models/convert-pt-to-ggml.py
+++ b/models/convert-pt-to-ggml.py
@ -40,131 +40,131 @@ import code
 import torch
 import numpy as np

-#from transformers import GPTJForCausalLM
-#from transformers import GPT2TokenizerFast
+from transformers import GPTJForCausalLM
+from transformers import GPT2TokenizerFast

 # ref: https://github.com/openai/whisper/blob/8cf36f3508c9acd341a45eb2364239a3d81458b9/whisper/tokenizer.py#L10-L110
-#LANGUAGES = {
-#    "en": "english",
-#    "zh": "chinese",
-#    "de": "german",
-#    "es": "spanish",
-#    "ru": "russian",
-#    "ko": "korean",
-#    "fr": "french",
-#    "ja": "japanese",
-#    "pt": "portuguese",
-#    "tr": "turkish",
-#    "pl": "polish",
-#    "ca": "catalan",
-#    "nl": "dutch",
-#    "ar": "arabic",
-#    "sv": "swedish",
-#    "it": "italian",
-#    "id": "indonesian",
-#    "hi": "hindi",
-#    "fi": "finnish",
-#    "vi": "vietnamese",
-#    "iw": "hebrew",
-#    "uk": "ukrainian",
-#    "el": "greek",
-#    "ms": "malay",
-#    "cs": "czech",
-#    "ro": "romanian",
-#    "da": "danish",
-#    "hu": "hungarian",
-#    "ta": "tamil",
-#    "no": "norwegian",
-#    "th": "thai",
-#    "ur": "urdu",
-#    "hr": "croatian",
-#    "bg": "bulgarian",
-#    "lt": "lithuanian",
-#    "la": "latin",
-#    "mi": "maori",
-#    "ml": "malayalam",
-#    "cy": "welsh",
-#    "sk": "slovak",
-#    "te": "telugu",
-#    "fa": "persian",
-#    "lv": "latvian",
-#    "bn": "bengali",
-#    "sr": "serbian",
-#    "az": "azerbaijani",
-#    "sl": "slovenian",
-#    "kn": "kannada",
-#    "et": "estonian",
-#    "mk": "macedonian",
-#    "br": "breton",
-#    "eu": "basque",
-#    "is": "icelandic",
-#    "hy": "armenian",
-#    "ne": "nepali",
-#    "mn": "mongolian",
-#    "bs": "bosnian",
-#    "kk": "kazakh",
-#    "sq": "albanian",
-#    "sw": "swahili",
-#    "gl": "galician",
-#    "mr": "marathi",
-#    "pa": "punjabi",
-#    "si": "sinhala",
-#    "km": "khmer",
-#    "sn": "shona",
-#    "yo": "yoruba",
-#    "so": "somali",
-#    "af": "afrikaans",
-#    "oc": "occitan",
-#    "ka": "georgian",
-#    "be": "belarusian",
-#    "tg": "tajik",
-#    "sd": "sindhi",
-#    "gu": "gujarati",
-#    "am": "amharic",
-#    "yi": "yiddish",
-#    "lo": "lao",
-#    "uz": "uzbek",
-#    "fo": "faroese",
-#    "ht": "haitian creole",
-#    "ps": "pashto",
-#    "tk": "turkmen",
-#    "nn": "nynorsk",
-#    "mt": "maltese",
-#    "sa": "sanskrit",
-#    "lb": "luxembourgish",
-#    "my": "myanmar",
-#    "bo": "tibetan",
-#    "tl": "tagalog",
-#    "mg": "malagasy",
-#    "as": "assamese",
-#    "tt": "tatar",
-#    "haw": "hawaiian",
-#    "ln": "lingala",
-#    "ha": "hausa",
-#    "ba": "bashkir",
-#    "jw": "javanese",
-#    "su": "sundanese",
-#}
+LANGUAGES = {
+    "en": "english",
+    "zh": "chinese",
+    "de": "german",
+    "es": "spanish",
+    "ru": "russian",
+    "ko": "korean",
+    "fr": "french",
+    "ja": "japanese",
+    "pt": "portuguese",
+    "tr": "turkish",
+    "pl": "polish",
+    "ca": "catalan",
+    "nl": "dutch",
+    "ar": "arabic",
+    "sv": "swedish",
+    "it": "italian",
+    "id": "indonesian",
+    "hi": "hindi",
+    "fi": "finnish",
+    "vi": "vietnamese",
+    "iw": "hebrew",
+    "uk": "ukrainian",
+    "el": "greek",
+    "ms": "malay",
+    "cs": "czech",
+    "ro": "romanian",
+    "da": "danish",
+    "hu": "hungarian",
+    "ta": "tamil",
+    "no": "norwegian",
+    "th": "thai",
+    "ur": "urdu",
+    "hr": "croatian",
+    "bg": "bulgarian",
+    "lt": "lithuanian",
+    "la": "latin",
+    "mi": "maori",
+    "ml": "malayalam",
+    "cy": "welsh",
+    "sk": "slovak",
+    "te": "telugu",
+    "fa": "persian",
+    "lv": "latvian",
+    "bn": "bengali",
+    "sr": "serbian",
+    "az": "azerbaijani",
+    "sl": "slovenian",
+    "kn": "kannada",
+    "et": "estonian",
+    "mk": "macedonian",
+    "br": "breton",
+    "eu": "basque",
+    "is": "icelandic",
+    "hy": "armenian",
+    "ne": "nepali",
+    "mn": "mongolian",
+    "bs": "bosnian",
+    "kk": "kazakh",
+    "sq": "albanian",
+    "sw": "swahili",
+    "gl": "galician",
+    "mr": "marathi",
+    "pa": "punjabi",
+    "si": "sinhala",
+    "km": "khmer",
+    "sn": "shona",
+    "yo": "yoruba",
+    "so": "somali",
+    "af": "afrikaans",
+    "oc": "occitan",
+    "ka": "georgian",
+    "be": "belarusian",
+    "tg": "tajik",
+    "sd": "sindhi",
+    "gu": "gujarati",
+    "am": "amharic",
+    "yi": "yiddish",
+    "lo": "lao",
+    "uz": "uzbek",
+    "fo": "faroese",
+    "ht": "haitian creole",
+    "ps": "pashto",
+    "tk": "turkmen",
+    "nn": "nynorsk",
+    "mt": "maltese",
+    "sa": "sanskrit",
+    "lb": "luxembourgish",
+    "my": "myanmar",
+    "bo": "tibetan",
+    "tl": "tagalog",
+    "mg": "malagasy",
+    "as": "assamese",
+    "tt": "tatar",
+    "haw": "hawaiian",
+    "ln": "lingala",
+    "ha": "hausa",
+    "ba": "bashkir",
+    "jw": "javanese",
+    "su": "sundanese",
+}

-## ref: https://github.com/openai/whisper/blob/8cf36f3508c9acd341a45eb2364239a3d81458b9/whisper/tokenizer.py#L273-L292
-#def build_tokenizer(path_to_whisper_repo: str, name: str = "gpt2"):
-#    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-#    path = os.path.join(path_to_whisper_repo, "whisper/assets", name)
-#    tokenizer = GPT2TokenizerFast.from_pretrained(path)
-#
-#    specials = [
-#        "<|startoftranscript|>",
-#        *[f"<|{lang}|>" for lang in LANGUAGES.keys()],
-#        "<|translate|>",
-#        "<|transcribe|>",
-#        "<|startoflm|>",
-#        "<|startofprev|>",
-#        "<|nocaptions|>",
-#        "<|notimestamps|>",
-#    ]
-#
-#    tokenizer.add_special_tokens(dict(additional_special_tokens=specials))
-#    return tokenizer
+# ref: https://github.com/openai/whisper/blob/8cf36f3508c9acd341a45eb2364239a3d81458b9/whisper/tokenizer.py#L273-L292
+def build_tokenizer(path_to_whisper_repo: str, name: str = "gpt2"):
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    path = os.path.join(path_to_whisper_repo, "whisper/assets", name)
+    tokenizer = GPT2TokenizerFast.from_pretrained(path)
+
+    specials = [
+        "<|startoftranscript|>",
+        *[f"<|{lang}|>" for lang in LANGUAGES.keys()],
+        "<|translate|>",
+        "<|transcribe|>",
+        "<|startoflm|>",
+        "<|startofprev|>",
+        "<|nocaptions|>",
+        "<|notimestamps|>",
+    ]
+
+    tokenizer.add_special_tokens(dict(additional_special_tokens=specials))
+    return tokenizer

 # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
 def bytes_to_unicode():
@ -224,17 +224,17 @@ with np.load(os.path.join(dir_whisper, "whisper/assets", "mel_filters.npz")) as
 #code.interact(local=locals())

 multilingual = hparams["n_vocab"] == 51865
-dir_tokenizer = os.path.join(dir_whisper, "whisper/assets", multilingual and "multilingual" or "gpt2")
+tokenizer = build_tokenizer(dir_whisper, multilingual and "multilingual" or "gpt2")

-#tokenizer = build_tokenizer(dir_whisper, multilingual and "multilingual" or "gpt2")
 #print(tokenizer)
 #print(tokenizer.name_or_path)
 #print(len(tokenizer.additional_special_tokens))
+dir_tokenizer = tokenizer.name_or_path

 # output in the same directory as the model
 fname_out = dir_out + "/ggml-model.bin"

-with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
+with open(dir_tokenizer + "/vocab.json", "r") as f:
    tokens = json.load(f)

 # use 16-bit or 32-bit floats
@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()}
 fout.write(struct.pack("i", len(tokens)))

 for key in tokens:
-    text = bytearray([byte_decoder[c] for c in key])
+    text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8')
    fout.write(struct.pack("i", len(text)))
    fout.write(text)

@ -297,6 +297,8 @@ for name in list_vars.keys():
                name == "encoder.conv2.bias"   or \
                name == "encoder.positional_embedding" or \
                name == "decoder.positional_embedding":
+            ftype = 0
+            data = data.astype(np.float32)
            print("  Converting to float32")
            data = data.astype(np.float32)
            ftype = 0
--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@ -3,26 +3,10 @@
 # This script downloads Whisper model files that have already been converted to ggml format.
 # This way you don't have to convert them yourself.

-#src="https://ggml.ggerganov.com"
-#pfx="ggml-model-whisper"
-
-src="https://huggingface.co/datasets/ggerganov/whisper.cpp"
-pfx="resolve/main/ggml"
-
-# get the path of this script
-function get_script_path() {
-    if [ -x "$(command -v realpath)" ]; then
-        echo "$(dirname $(realpath $0))"
-    else
-        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
-        echo "$ret"
-    fi
-}
-
-models_path=$(get_script_path)
+ggml_path=$(dirname $(realpath $0))

 # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )

 # list available models
 function list_models {
@ -52,24 +36,16 @@ fi

 # download ggml model

-printf "Downloading ggml model $model from '$src' ...\n"
+printf "Downloading ggml model $model ...\n"

-cd $models_path
+mkdir -p models

-if [ -f "ggml-$model.bin" ]; then
+if [ -f "models/ggml-$model.bin" ]; then
    printf "Model $model already exists. Skipping download.\n"
    exit 0
 fi

-if [ -x "$(command -v wget)" ]; then
-    wget --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin
-elif [ -x "$(command -v curl)" ]; then
-    curl -L --output ggml-$model.bin $src/$pfx-$model.bin
-else
-    printf "Either wget or curl is required to download models.\n"
-    exit 1
-fi
-
+wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin

 if [ $? -ne 0 ]; then
    printf "Failed to download ggml model $model \n"
--- a/examples/dr_wav.h
+++ b/examples/dr_wav.h
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -1,33 +0,0 @@
-# dependencies
-
-find_package(Threads REQUIRED)
-
-# third-party
-
-if (WHISPER_SUPPORT_SDL2)
-    # SDL2
-    find_package(SDL2 REQUIRED)
-
-    string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
-
-    message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
-    message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
-endif()
-
-# examples
-
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
-if (EMSCRIPTEN)
-    add_subdirectory(whisper.wasm)
-    add_subdirectory(stream.wasm)
-    add_subdirectory(command.wasm)
-    add_subdirectory(talk.wasm)
-    add_subdirectory(bench.wasm)
-else()
-    add_subdirectory(main)
-    add_subdirectory(stream)
-    add_subdirectory(command)
-    add_subdirectory(bench)
-    add_subdirectory(talk)
-endif()
--- a/examples/bench.wasm/CMakeLists.txt
+++ b/examples/bench.wasm/CMakeLists.txt
@ -1,47 +0,0 @@
-#
-# libbench
-#
-
-set(TARGET libbench)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside bench.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libbench.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/bench.wasm/bench.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-#
-# bench.wasm
-#
-
-set(TARGET bench.wasm)
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/bench.wasm/README.md
+++ b/examples/bench.wasm/README.md
@ -1,22 +0,0 @@
-# bench.wasm
-
-Benchmark the performance of whisper.cpp in the browser using WebAssembly
-
-Link: https://whisper.ggerganov.com/bench/
-
-Terminal version: [examples/bench](/examples/bench)
-
-## Build instructions
-
-```bash
-# build using Emscripten (v3.1.2)
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-mkdir build-em && cd build-em
-emcmake cmake ..
-make -j
-
-# copy the produced page to your HTTP path
-cp bin/bench.wasm/*       /path/to/html/
-cp bin/libbench.worker.js /path/to/html/
-```
--- a/examples/bench.wasm/emscripten.cpp
+++ b/examples/bench.wasm/emscripten.cpp
@ -1,80 +0,0 @@
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <cmath>
-#include <string>
-#include <thread>
-#include <vector>
-
-constexpr int N_THREAD = 8;
-
-// TODO: get rid of this vector of contexts - bad idea in the first place
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::thread g_worker;
-
-void bench_main(size_t index) {
-    const int n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-
-    // whisper context
-    auto & ctx = g_contexts[index];
-
-    fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);
-
-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
-        fprintf(stderr, "error: failed to set mel: %d\n", ret);
-        return;
-    }
-
-    if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
-        return;
-    }
-
-    whisper_print_timings(ctx);
-
-    fprintf(stderr, "\n");
-    fprintf(stderr, "If you wish, you can submit these results here:\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "  https://github.com/ggerganov/whisper.cpp/issues/89\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Please include the following information:\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "  - CPU model\n");
-    fprintf(stderr, "  - Operating system\n");
-    fprintf(stderr, "  - Browser\n");
-    fprintf(stderr, "\n");
-}
-
-EMSCRIPTEN_BINDINGS(bench) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        bench_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (index < g_contexts.size()) {
-            whisper_free(g_contexts[index]);
-            g_contexts[index] = nullptr;
-        }
-    }));
-}
--- a/examples/bench.wasm/index-tmpl.html
+++ b/examples/bench.wasm/index-tmpl.html
@ -1,227 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>bench : Benchmark whisper.cpp performance in the browser</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="main-container">
-            <b>bench : Benchmark whisper.cpp performance in the browser</b>
-
-            <br><br>
-
-            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/bench.wasm">GitHub</a>.
-
-            <br><br>
-
-            <hr>
-
-            Select the model you would like to use and click the "Bench" button.<br>
-            The results will be displayed in the textarea below.
-
-            <br><br>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-
-                <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-            </div>
-
-            <br>
-
-            <div id="input">
-                <button id="bench" onclick="onBench()" disabled>Bench</button>
-                <button id="clear" onclick="clearCache()">Clear Cache</button>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/bench.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="helpers.js"></script>
-        <script type='text/javascript'>
-            // the bench instance
-            var instance = null;
-
-            // model name
-            var model_whisper = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Initialized successfully!');
-                }
-            };
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                model_whisper = fname;
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                if (model_whisper != null) {
-                    document.getElementById('bench').disabled = false;
-                }
-            }
-
-            function loadFile(event, fname) {
-                var file = event.target.files[0] || null;
-                if (file == null) {
-                    return;
-                }
-
-                printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
-                printTextarea('loadFile: please wait ...');
-
-                var reader = new FileReader();
-                reader.onload = function(event) {
-                    var buf = new Uint8Array(reader.result);
-                    storeFS(fname, buf);
-                }
-                reader.readAsArrayBuffer(file);
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('whisper-file'         ).style.display = 'none';
-                document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
-            }
-
-            function loadWhisper(model) {
-                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                };
-
-                let sizes = {
-                    'tiny.en': 75,
-                    'base.en': 142,
-                };
-
-                let url     = urls[model];
-                let dst     = 'whisper.bin';
-                let size_mb = sizes[model];
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // main
-            //
-
-            function onBench() {
-                if (instance) {
-                    Module.free(instance);
-                }
-
-                instance = Module.init('whisper.bin');
-
-                if (instance) {
-                    printTextarea("js: whisper initialized, instance: " + instance);
-                }
-
-                document.getElementById('bench').disabled = true;
-
-                if (!instance) {
-                    printTextarea("js: failed to initialize whisper");
-                    return;
-                }
-            }
-
-        </script>
-        <script type="text/javascript" src="bench.js"></script>
-    </body>
-</html>
--- a/examples/bench/CMakeLists.txt
+++ b/examples/bench/CMakeLists.txt
@ -1,3 +0,0 @@
-set(TARGET bench)
-add_executable(${TARGET} bench.cpp)
-target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/bench/README.md
+++ b/examples/bench/README.md
@ -1,54 +0,0 @@
-# bench
-
-A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
-the transformer on some random audio data and records the execution time. This way we can have an objective comparison
-of the performance of the model for various setups.
-
-Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
-
-```bash
-# build the bench tool
-$ make bench
-
-# run it on the small.en model using 4 threads
-$ ./bench -m ./models/ggml-small.en.bin -t 4
-
-whisper_model_load: loading model from './models/ggml-small.en.bin'
-whisper_model_load: n_vocab       = 51864
-whisper_model_load: n_audio_ctx   = 1500
-whisper_model_load: n_audio_state = 768
-whisper_model_load: n_audio_head  = 12
-whisper_model_load: n_audio_layer = 12
-whisper_model_load: n_text_ctx    = 448
-whisper_model_load: n_text_state  = 768
-whisper_model_load: n_text_head   = 12
-whisper_model_load: n_text_layer  = 12
-whisper_model_load: n_mels        = 80
-whisper_model_load: f16           = 1
-whisper_model_load: type          = 3
-whisper_model_load: mem_required  = 1048.00 MB
-whisper_model_load: adding 1607 extra tokens
-whisper_model_load: ggml ctx size = 533.05 MB
-whisper_model_load: memory size =    68.48 MB 
-whisper_model_load: model size  =   464.44 MB
-
-whisper_print_timings:     load time =   240.82 ms
-whisper_print_timings:      mel time =     0.00 ms
-whisper_print_timings:   sample time =     0.00 ms
-whisper_print_timings:   encode time =  1062.21 ms / 88.52 ms per layer
-whisper_print_timings:   decode time =     0.00 ms / 0.00 ms per layer
-whisper_print_timings:    total time =  1303.04 ms
-
-system_info: n_threads = 4 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
-
-If you wish, you can submit these results here:
-
-  https://github.com/ggerganov/whisper.cpp/issues/89
-
-Please include the following information:
-
-  - CPU model
-  - Operating system
-  - Compiler
-
-```
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -1,94 +0,0 @@
-#include "whisper.h"
-
-#include <cstdio>
-#include <string>
-#include <thread>
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
-
-    std::string model = "models/ggml-base.en.bin";
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
-        else if (arg == "-m" || arg == "--model")   { params.model     = argv[++i]; }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "\n");
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx = whisper_init(params.model.c_str());
-
-    {
-        fprintf(stderr, "\n");
-        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
-    }
-
-    if (ctx == nullptr) {
-        fprintf(stderr, "error: failed to initialize whisper context\n");
-        return 2;
-    }
-
-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
-        fprintf(stderr, "error: failed to set mel: %d\n", ret);
-        return 3;
-    }
-
-    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
-        return 4;
-    }
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    fprintf(stderr, "\n");
-    fprintf(stderr, "If you wish, you can submit these results here:\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "  https://github.com/ggerganov/whisper.cpp/issues/89\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Please include the following information:\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "  - CPU model\n");
-    fprintf(stderr, "  - Operating system\n");
-    fprintf(stderr, "  - Compiler\n");
-    fprintf(stderr, "\n");
-
-    return 0;
-}
--- a/examples/command.wasm/CMakeLists.txt
+++ b/examples/command.wasm/CMakeLists.txt
@ -1,47 +0,0 @@
-#
-# libcommand
-#
-
-set(TARGET libcommand)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside command.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libcommand.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/command.wasm/command.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-#
-# command.wasm
-#
-
-set(TARGET command.wasm)
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/command.wasm/README.md
+++ b/examples/command.wasm/README.md
@ -1,23 +0,0 @@
-# command.wasm
-
-This is a basic Voice Assistant example that accepts voice commands from the microphone.
-It runs in fully in the browser via WebAseembly.
-
-Online demo: https://whisper.ggerganov.com/command/
-
-Terminal version: [examples/command](/examples/command)
-
-## Build instructions
-
-```bash
-# build using Emscripten (v3.1.2)
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-mkdir build-em && cd build-em
-emcmake cmake ..
-make -j
-
-# copy the produced page to your HTTP path
-cp bin/command.wasm/*       /path/to/html/
-cp bin/libcommand.worker.js /path/to/html/
-```
--- a/examples/command.wasm/emscripten.cpp
+++ b/examples/command.wasm/emscripten.cpp
@ -1,408 +0,0 @@
-#include "ggml.h"
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <atomic>
-#include <cmath>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-#include <regex>
-
-constexpr int N_THREAD = 8;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::mutex  g_mutex;
-std::thread g_worker;
-
-std::atomic<bool> g_running(false);
-
-std::string g_status        = "";
-std::string g_status_forced = "";
-std::string g_transcribed   = "";
-
-std::vector<float> g_pcmf32;
-
-static std::string trim(const std::string & s) {
-    std::regex e("^\\s+|\\s+$");
-    return std::regex_replace(s, e, "");
-}
-
-static void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
-    const float rc = 1.0f / (2.0f * M_PI * cutoff);
-    const float dt = 1.0f / sample_rate;
-    const float alpha = dt / (rc + dt);
-
-    float y = data[0];
-
-    for (size_t i = 1; i < data.size(); i++) {
-        y = alpha * (y + data[i] - data[i - 1]);
-        data[i] = y;
-    }
-}
-
-// compute similarity between two strings using Levenshtein distance
-static float similarity(const std::string & s0, const std::string & s1) {
-    const size_t len0 = s0.size() + 1;
-    const size_t len1 = s1.size() + 1;
-
-    std::vector<int> col(len1, 0);
-    std::vector<int> prevCol(len1, 0);
-
-    for (size_t i = 0; i < len1; i++) {
-        prevCol[i] = i;
-    }
-
-    for (size_t i = 0; i < len0; i++) {
-        col[0] = i;
-        for (size_t j = 1; j < len1; j++) {
-            col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
-        }
-        col.swap(prevCol);
-    }
-
-    const float dist = prevCol[len1 - 1];
-
-    return 1.0f - (dist / std::max(s0.size(), s1.size()));
-}
-
-void command_set_status(const std::string & status) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_status = status;
-}
-
-bool command_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
-    const int n_samples      = pcmf32.size();
-    const int n_samples_last = (sample_rate * last_ms) / 1000;
-
-    if (n_samples_last >= n_samples) {
-        // not enough samples - assume no speech
-        return false;
-    }
-
-    if (freq_thold > 0.0f) {
-        high_pass_filter(pcmf32, freq_thold, sample_rate);
-    }
-
-    float energy_all  = 0.0f;
-    float energy_last = 0.0f;
-
-    for (size_t i = 0; i < n_samples; i++) {
-        energy_all += fabsf(pcmf32[i]);
-
-        if (i >= n_samples - n_samples_last) {
-            energy_last += fabsf(pcmf32[i]);
-        }
-    }
-
-    energy_all  /= n_samples;
-    energy_last /= n_samples_last;
-
-    if (verbose) {
-        fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
-    }
-
-    if (energy_last > vad_thold*energy_all) {
-        return false;
-    }
-
-    return true;
-}
-
-std::string command_transcribe(whisper_context * ctx, const whisper_full_params & wparams, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-
-    prob = 0.0f;
-    t_ms = 0;
-
-    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
-        return "";
-    }
-
-    int prob_n = 0;
-    std::string result;
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-
-        result += text;
-
-        const int n_tokens = whisper_full_n_tokens(ctx, i);
-        for (int j = 0; j < n_tokens; ++j) {
-            const auto token = whisper_full_get_token_data(ctx, i, j);
-
-            prob += token.p;
-            ++prob_n;
-        }
-    }
-
-    if (prob_n > 0) {
-        prob /= prob_n;
-    }
-
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-
-    return result;
-}
-
-void command_get_audio(int ms, int sample_rate, std::vector<float> & audio) {
-    const int64_t n_samples = (ms * sample_rate) / 1000;
-
-    int64_t n_take = 0;
-    if (g_pcmf32.size() < n_samples) {
-        n_take = g_pcmf32.size();
-    } else {
-        n_take = n_samples;
-    }
-
-    audio.resize(n_take);
-    std::copy(g_pcmf32.end() - n_take, g_pcmf32.end(), audio.begin());
-}
-
-void command_main(size_t index) {
-    command_set_status("loading data ...");
-
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 768; // partial encoder context for better performance
-
-    wparams.language         = "en";
-
-    printf("command: using %d threads\n", wparams.n_threads);
-
-    bool is_running   = true;
-    bool have_prompt  = false;
-    bool ask_prompt   = true;
-    bool print_energy = false;
-
-    float prob0 = 0.0f;
-    float prob  = 0.0f;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    const std::string k_prompt = "Ok Whisper, start listening for commands.";
-
-    // whisper context
-    auto & ctx = g_contexts[index];
-
-    const int32_t vad_ms     = 2000;
-    const int32_t prompt_ms  = 5000;
-    const int32_t command_ms = 4000;
-
-    const float vad_thold  = 0.1f;
-    const float freq_thold = -1.0f;
-
-    while (g_running) {
-        // delay
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-        if (ask_prompt) {
-            fprintf(stdout, "\n");
-            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-            fprintf(stdout, "\n");
-
-            {
-                char txt[1024];
-                snprintf(txt, sizeof(txt), "Say the following phrase: '%s'", k_prompt.c_str());
-                command_set_status(txt);
-            }
-
-            ask_prompt = false;
-        }
-
-        int64_t t_ms = 0;
-
-        {
-            command_get_audio(vad_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
-
-            if (command_vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
-                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-                command_set_status("Speech detected! Processing ...");
-
-                if (!have_prompt) {
-                    command_get_audio(prompt_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
-
-                    const auto txt = ::trim(::command_transcribe(ctx, wparams, pcmf32_cur, prob0, t_ms));
-
-                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
-
-                    const float sim = similarity(txt, k_prompt);
-
-                    if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
-                        fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
-                        ask_prompt = true;
-                    } else {
-                        fprintf(stdout, "\n");
-                        fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
-                        fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
-                        fprintf(stdout, "\n");
-
-                        {
-                            char txt[1024];
-                            snprintf(txt, sizeof(txt), "Success! Waiting for voice commands ...");
-                            command_set_status(txt);
-                        }
-
-                        // save the audio for the prompt
-                        pcmf32_prompt = pcmf32_cur;
-                        have_prompt = true;
-                    }
-                } else {
-                    command_get_audio(command_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
-
-                    // prepend the prompt audio
-                    pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-
-                    const auto txt = ::trim(::command_transcribe(ctx, wparams, pcmf32_cur, prob, t_ms));
-
-                    prob = 100.0f*(prob - prob0);
-
-                    fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
-
-                    // find the prompt in the text
-                    float best_sim = 0.0f;
-                    size_t best_len = 0;
-                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
-                        const auto prompt = txt.substr(0, n);
-
-                        const float sim = similarity(prompt, k_prompt);
-
-                        //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
-
-                        if (sim > best_sim) {
-                            best_sim = sim;
-                            best_len = n;
-                        }
-                    }
-
-                    const std::string command = ::trim(txt.substr(best_len));
-
-                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-                    fprintf(stdout, "\n");
-
-                    {
-                        char txt[1024];
-                        snprintf(txt, sizeof(txt), "Command '%s', (t = %d ms)", command.c_str(), (int) t_ms);
-                        command_set_status(txt);
-                    }
-                    {
-                        std::lock_guard<std::mutex> lock(g_mutex);
-                        g_transcribed = command;
-                    }
-                }
-
-                g_pcmf32.clear();
-            }
-        }
-    }
-
-    if (index < g_contexts.size()) {
-        whisper_free(g_contexts[index]);
-        g_contexts[index] = nullptr;
-    }
-}
-
-EMSCRIPTEN_BINDINGS(command) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    g_running = true;
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        command_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (g_running) {
-            g_running = false;
-        }
-    }));
-
-    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            const int n = audio["length"].as<int>();
-
-            emscripten::val heap = emscripten::val::module_property("HEAPU8");
-            emscripten::val memory = heap["buffer"];
-
-            g_pcmf32.resize(n);
-
-            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
-            memoryView.call<void>("set", audio);
-        }
-
-        return 0;
-    }));
-
-    emscripten::function("get_transcribed", emscripten::optional_override([]() {
-        std::string transcribed;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            transcribed = std::move(g_transcribed);
-        }
-
-        return transcribed;
-    }));
-
-    emscripten::function("get_status", emscripten::optional_override([]() {
-        std::string status;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            status = g_status_forced.empty() ? g_status : g_status_forced;
-        }
-
-        return status;
-    }));
-
-    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            g_status_forced = status;
-        }
-    }));
-}
--- a/examples/command.wasm/index-tmpl.html
+++ b/examples/command.wasm/index-tmpl.html
@ -1,386 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>command : Voice assistant example using Whisper + WebAssembly</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="main-container">
-            <b>command : Voice assistant example using Whisper + WebAssembly</b>
-
-            <br><br>
-
-            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">GitHub</a>.
-
-            <br><br>
-
-            <hr>
-
-            Select the model you would like to use, click the "Start" button and follow the instructions.
-
-            <br><br>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-
-                <!--
-                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-                -->
-            </div>
-
-            <br>
-
-            <div id="input">
-                <button id="start" onclick="onStart()" disabled>Start</button>
-                <button id="stop"  onclick="onStop()" disabled>Stop</button>
-                <button id="clear" onclick="clearCache()">Clear Cache</button>
-            </div>
-
-            <br>
-
-            <div id="state">
-                Status: <b><span id="state-status">not started</span></b>
-
-                <pre id="state-transcribed">[The recognized voice commands will be displayed here]</pre>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="helpers.js"></script>
-        <script type='text/javascript'>
-            // web audio context
-            var context = null;
-
-            // audio data
-            var audio = null;
-            var audio0 = null;
-
-            // the command instance
-            var instance = null;
-
-            // model name
-            var model_whisper = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Initialized successfully!');
-                }
-            };
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                if (model_whisper != null) {
-                    document.getElementById('start').disabled = false;
-                    document.getElementById('stop' ).disabled = true;
-                }
-            }
-
-            function loadWhisper(model) {
-                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                };
-
-                let sizes = {
-                    'tiny.en': 75,
-                    'base.en': 142,
-                };
-
-                let url     = urls[model];
-                let dst     = 'whisper.bin';
-                let size_mb = sizes[model];
-
-                model_whisper = model;
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // microphone
-            //
-
-            const kSampleRate = 16000;
-            const kRestartRecording_s = 120;
-            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function stopRecording() {
-                Module.set_status("paused");
-                doRecording = false;
-                audio0 = null;
-                audio = null;
-                context = null;
-            }
-
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                Module.set_status("");
-
-                document.getElementById('start').disabled = true;
-                document.getElementById('stop').disabled = false;
-
-                doRecording = true;
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            var reader = new FileReader();
-
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-
-                                if (!context) {
-                                    return;
-                                }
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        audio = renderedBuffer.getChannelData(0);
-
-                                        //printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
-
-                                        var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
-                                        if (audio0 != null) {
-                                            audioAll.set(audio0, 0);
-                                        }
-                                        audioAll.set(audio, audio0 == null ? 0 : audio0.length);
-
-                                        if (instance) {
-                                            Module.set_audio(instance, audioAll);
-                                        }
-                                    });
-                                }, function(e) {
-                                    audio = null;
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-
-                        mediaRecorder.onstop = function(e) {
-                            if (doRecording) {
-                                setTimeout(function() {
-                                    startRecording();
-                                });
-                            }
-                        };
-
-                        mediaRecorder.start(kIntervalAudio_ms);
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-
-                var interval = setInterval(function() {
-                    if (!doRecording) {
-                        clearInterval(interval);
-                        mediaRecorder.stop();
-                        stream.getTracks().forEach(function(track) {
-                            track.stop();
-                        });
-
-                        document.getElementById('start').disabled = false;
-                        document.getElementById('stop').disabled  = true;
-
-                        mediaRecorder = null;
-                    }
-
-                    // if audio length is more than kRestartRecording_s seconds, restart recording
-                    if (audio != null && audio.length > kSampleRate*kRestartRecording_s) {
-                        if (doRecording) {
-                            //printTextarea('js: restarting recording');
-
-                            clearInterval(interval);
-                            audio0 = audio;
-                            audio = null;
-                            mediaRecorder.stop();
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        }
-                    }
-                }, 100);
-            }
-
-            //
-            // main
-            //
-
-            var nLines = 0;
-            var intervalUpdate = null;
-            var transcribedAll = '';
-
-            function onStart() {
-                if (!instance) {
-                    instance = Module.init('whisper.bin');
-
-                    if (instance) {
-                        printTextarea("js: whisper initialized, instance: " + instance);
-                    }
-                }
-
-                if (!instance) {
-                    printTextarea("js: failed to initialize whisper");
-                    return;
-                }
-
-                startRecording();
-
-                intervalUpdate = setInterval(function() {
-                    var transcribed = Module.get_transcribed();
-
-                    if (transcribed != null && transcribed.length > 1) {
-                        transcribedAll += transcribed + '<br>';
-                        nLines++;
-
-                        // if more than 10 lines, remove the first line
-                        if (nLines > 10) {
-                            var i = transcribedAll.indexOf('<br>');
-                            if (i > 0) {
-                                transcribedAll = transcribedAll.substring(i + 4);
-                                nLines--;
-                            }
-                        }
-                    }
-
-                    document.getElementById('state-status').innerHTML = Module.get_status();
-                    document.getElementById('state-transcribed').innerHTML = transcribedAll;
-                }, 100);
-            }
-
-            function onStop() {
-                stopRecording();
-            }
-
-        </script>
-        <script type="text/javascript" src="command.js"></script>
-    </body>
-</html>
--- a/examples/command/CMakeLists.txt
+++ b/examples/command/CMakeLists.txt
@ -1,7 +0,0 @@
-if (WHISPER_SUPPORT_SDL2)
-    # command
-    set(TARGET command)
-    add_executable(${TARGET} command.cpp)
-    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
-    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-endif ()
--- a/examples/command/README.md
+++ b/examples/command/README.md
@ -1,30 +0,0 @@
-# command
-
-This is a basic Voice Assistant example that accepts voice commands from the microphone.
-More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).
-
-```bash
-# Run with default arguments and small model
-./command -m ./models/ggml-small.en.bin -t 8
-
-# On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
-./command -m ./models/ggml-tiny.en.bin -ac 768 -t 4 -c 0
-```
-
-https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
-
-Web version: [examples/command.wasm](/examples/command.wasm)
-
-## Building
-
-The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
-
-```bash
-# Install SDL2 on Linux
-sudo apt-get install libsdl2-dev
-
-# Install SDL2 on Mac OS
-brew install sdl2
-
-make command
-```
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -1,654 +0,0 @@
-// Voice assistant example
-//
-// Speak short text commands to the microphone.
-// This program will detect your voice command and convert them to text.
-//
-// ref: https://github.com/ggerganov/whisper.cpp/issues/171
-//
-
-#include "whisper.h"
-
-#include <SDL.h>
-#include <SDL_audio.h>
-
-#include <cassert>
-#include <cstdio>
-#include <fstream>
-#include <mutex>
-#include <regex>
-#include <string>
-#include <thread>
-#include <vector>
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t prompt_ms  = 5000;
-    int32_t command_ms = 4000;
-    int32_t capture_id = -1;
-    int32_t max_tokens = 32;
-    int32_t audio_ctx  = 0;
-
-    float vad_thold    = 0.6f;
-    float freq_thold   = 100.0f;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool print_special = false;
-    bool print_energy  = false;
-    bool no_timestamps = true;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out = "";
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (arg == "-pms" || arg == "--prompt-ms")     { params.prompt_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-cms" || arg == "--command-ms")    { params.command_ms    = std::stoi(argv[++i]); }
-        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
-        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
-        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
-        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
-        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
-        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help          [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N     [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -pms N,   --prompt-ms N   [%-7d] prompt duration in milliseconds\n",             params.prompt_ms);
-    fprintf(stderr, "  -cms N,   --command-ms N  [%-7d] command duration in milliseconds\n",            params.command_ms);
-    fprintf(stderr, "  -c ID,    --capture ID    [%-7d] capture device ID\n",                           params.capture_id);
-    fprintf(stderr, "  -mt N,    --max-tokens N  [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
-    fprintf(stderr, "  -ac N,    --audio-ctx N   [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
-    fprintf(stderr, "  -vth N,   --vad-thold N   [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
-    fprintf(stderr, "  -fth N,   --freq-thold N  [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
-    fprintf(stderr, "  -su,      --speed-up      [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
-    fprintf(stderr, "  -ps,      --print-special [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pe,      --print-energy  [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
-    fprintf(stderr, "  -l LANG,  --language LANG [%-7s] spoken language\n",                             params.language.c_str());
-    fprintf(stderr, "  -m FNAME, --model FNAME   [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME    [%-7s] text output file name\n",                       params.fname_out.c_str());
-    fprintf(stderr, "\n");
-}
-
-//
-// SDL Audio capture
-//
-
-class audio_async {
-public:
-    audio_async(int len_ms);
-    ~audio_async();
-
-    bool init(int capture_id, int sample_rate);
-
-    // start capturing audio via the provided SDL callback
-    // keep last len_ms seconds of audio in a circular buffer
-    bool resume();
-    bool pause();
-    bool clear();
-
-    // callback to be called by SDL
-    void callback(uint8_t * stream, int len);
-
-    // get audio data from the circular buffer
-    void get(int ms, std::vector<float> & audio);
-
-private:
-    SDL_AudioDeviceID m_dev_id_in = 0;
-
-    int m_len_ms = 0;
-    int m_sample_rate = 0;
-
-    bool       m_running = false;
-    std::mutex m_mutex;
-
-    std::vector<float> m_audio;
-    std::vector<float> m_audio_new;
-    size_t             m_audio_pos = 0;
-    size_t             m_audio_len = 0;
-};
-
-audio_async::audio_async(int len_ms) {
-    m_len_ms = len_ms;
-}
-
-audio_async::~audio_async() {
-    if (m_dev_id_in) {
-        SDL_CloseAudioDevice(m_dev_id_in);
-    }
-}
-
-bool audio_async::init(int capture_id, int sample_rate) {
-    SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
-
-    if (SDL_Init(SDL_INIT_AUDIO) < 0) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
-        return false;
-    }
-
-    SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium", SDL_HINT_OVERRIDE);
-
-    {
-        int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
-        fprintf(stderr, "%s: found %d capture devices:\n", __func__, nDevices);
-        for (int i = 0; i < nDevices; i++) {
-            fprintf(stderr, "%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
-        }
-    }
-
-    SDL_AudioSpec capture_spec_requested;
-    SDL_AudioSpec capture_spec_obtained;
-
-    SDL_zero(capture_spec_requested);
-    SDL_zero(capture_spec_obtained);
-
-    capture_spec_requested.freq     = sample_rate;
-    capture_spec_requested.format   = AUDIO_F32;
-    capture_spec_requested.channels = 1;
-    capture_spec_requested.samples  = 1024;
-    capture_spec_requested.callback = [](void * userdata, uint8_t * stream, int len) {
-        audio_async * audio = (audio_async *) userdata;
-        audio->callback(stream, len);
-    };
-    capture_spec_requested.userdata = this;
-
-    if (capture_id >= 0) {
-        fprintf(stderr, "%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
-        m_dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(capture_id, SDL_TRUE), SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    } else {
-        fprintf(stderr, "%s: attempt to open default capture device ...\n", __func__);
-        m_dev_id_in = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    }
-
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
-        m_dev_id_in = 0;
-
-        return false;
-    } else {
-        fprintf(stderr, "%s: obtained spec for input device (SDL Id = %d):\n", __func__, m_dev_id_in);
-        fprintf(stderr, "%s:     - sample rate:       %d\n",                   __func__, capture_spec_obtained.freq);
-        fprintf(stderr, "%s:     - format:            %d (required: %d)\n",    __func__, capture_spec_obtained.format,
-                capture_spec_requested.format);
-        fprintf(stderr, "%s:     - channels:          %d (required: %d)\n",    __func__, capture_spec_obtained.channels,
-                capture_spec_requested.channels);
-        fprintf(stderr, "%s:     - samples per frame: %d\n",                   __func__, capture_spec_obtained.samples);
-    }
-
-    m_sample_rate = capture_spec_obtained.freq;
-
-    m_audio.resize((m_sample_rate*m_len_ms)/1000);
-
-    return true;
-}
-
-bool audio_async::resume() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to resume!\n", __func__);
-        return false;
-    }
-
-    if (m_running) {
-        fprintf(stderr, "%s: already running!\n", __func__);
-        return false;
-    }
-
-    SDL_PauseAudioDevice(m_dev_id_in, 0);
-
-    m_running = true;
-
-    return true;
-}
-
-bool audio_async::pause() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to pause!\n", __func__);
-        return false;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: already paused!\n", __func__);
-        return false;
-    }
-
-    SDL_PauseAudioDevice(m_dev_id_in, 1);
-
-    m_running = false;
-
-    return true;
-}
-
-bool audio_async::clear() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to clear!\n", __func__);
-        return false;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: not running!\n", __func__);
-        return false;
-    }
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        m_audio_pos = 0;
-        m_audio_len = 0;
-    }
-
-    return true;
-}
-
-// callback to be called by SDL
-void audio_async::callback(uint8_t * stream, int len) {
-    if (!m_running) {
-        return;
-    }
-
-    const size_t n_samples = len / sizeof(float);
-
-    m_audio_new.resize(n_samples);
-    memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));
-
-    //fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        if (m_audio_pos + n_samples > m_audio.size()) {
-            const size_t n0 = m_audio.size() - m_audio_pos;
-
-            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
-            memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));
-
-            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
-            m_audio_len = m_audio.size();
-        } else {
-            memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
-
-            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
-            m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
-        }
-    }
-}
-
-void audio_async::get(int ms, std::vector<float> & result) {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to get audio from!\n", __func__);
-        return;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: not running!\n", __func__);
-        return;
-    }
-
-    result.clear();
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        if (ms <= 0) {
-            ms = m_len_ms;
-        }
-
-        size_t n_samples = (m_sample_rate * ms) / 1000;
-        if (n_samples > m_audio_len) {
-            n_samples = m_audio_len;
-        }
-
-        result.resize(n_samples);
-
-        int s0 = m_audio_pos - n_samples;
-        if (s0 < 0) {
-            s0 += m_audio.size();
-        }
-
-        if (s0 + n_samples > m_audio.size()) {
-            const size_t n0 = m_audio.size() - s0;
-
-            memcpy(result.data(), &m_audio[s0], n0 * sizeof(float));
-            memcpy(&result[n0], &m_audio[0], (n_samples - n0) * sizeof(float));
-        } else {
-            memcpy(result.data(), &m_audio[s0], n_samples * sizeof(float));
-        }
-    }
-}
-
-///////////////////////////
-
-std::string trim(const std::string & s) {
-    std::regex e("^\\s+|\\s+$");
-    return std::regex_replace(s, e, "");
-}
-
-void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
-    const float rc = 1.0f / (2.0f * M_PI * cutoff);
-    const float dt = 1.0f / sample_rate;
-    const float alpha = dt / (rc + dt);
-
-    float y = data[0];
-
-    for (size_t i = 1; i < data.size(); i++) {
-        y = alpha * (y + data[i] - data[i - 1]);
-        data[i] = y;
-    }
-}
-
-bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
-    const int n_samples      = pcmf32.size();
-    const int n_samples_last = (sample_rate * last_ms) / 1000;
-
-    if (n_samples_last >= n_samples) {
-        // not enough samples - assume no speech
-        return false;
-    }
-
-    if (freq_thold > 0.0f) {
-        high_pass_filter(pcmf32, freq_thold, sample_rate);
-    }
-
-    float energy_all  = 0.0f;
-    float energy_last = 0.0f;
-
-    for (size_t i = 0; i < n_samples; i++) {
-        energy_all += fabsf(pcmf32[i]);
-
-        if (i >= n_samples - n_samples_last) {
-            energy_last += fabsf(pcmf32[i]);
-        }
-    }
-
-    energy_all  /= n_samples;
-    energy_last /= n_samples_last;
-
-    if (verbose) {
-        fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
-    }
-
-    if (energy_last > vad_thold*energy_all) {
-        return false;
-    }
-
-    return true;
-}
-
-std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-
-    prob = 0.0f;
-    t_ms = 0;
-
-    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-    wparams.print_progress   = false;
-    wparams.print_special    = params.print_special;
-    wparams.print_realtime   = false;
-    wparams.print_timestamps = !params.no_timestamps;
-    wparams.translate        = params.translate;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.max_tokens       = params.max_tokens;
-    wparams.language         = params.language.c_str();
-    wparams.n_threads        = params.n_threads;
-
-    wparams.audio_ctx        = params.audio_ctx;
-    wparams.speed_up         = params.speed_up;
-
-    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
-        return "";
-    }
-
-    int prob_n = 0;
-    std::string result;
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-
-        result += text;
-
-        const int n_tokens = whisper_full_n_tokens(ctx, i);
-        for (int j = 0; j < n_tokens; ++j) {
-            const auto token = whisper_full_get_token_data(ctx, i, j);
-
-            prob += token.p;
-            ++prob_n;
-        }
-    }
-
-    if (prob_n > 0) {
-        prob /= prob_n;
-    }
-
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-
-    return result;
-}
-
-// compute similarity between two strings using Levenshtein distance
-float similarity(const std::string & s0, const std::string & s1) {
-    const size_t len0 = s0.size() + 1;
-    const size_t len1 = s1.size() + 1;
-
-    std::vector<int> col(len1, 0);
-    std::vector<int> prevCol(len1, 0);
-
-    for (size_t i = 0; i < len1; i++) {
-        prevCol[i] = i;
-    }
-
-    for (size_t i = 0; i < len0; i++) {
-        col[0] = i;
-        for (size_t j = 1; j < len1; j++) {
-            col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
-        }
-        col.swap(prevCol);
-    }
-
-    const float dist = prevCol[len1 - 1];
-
-    return 1.0f - (dist / std::max(s0.size(), s1.size()));
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx = whisper_init(params.model.c_str());
-
-    // print some info about the processing
-    {
-        fprintf(stderr, "\n");
-        if (!whisper_is_multilingual(ctx)) {
-            if (params.language != "en" || params.translate) {
-                params.language = "en";
-                params.translate = false;
-                fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-            }
-        }
-        fprintf(stderr, "%s: processing, %d threads, lang = %s, task = %s, timestamps = %d ...\n",
-                __func__,
-                params.n_threads,
-                params.language.c_str(),
-                params.translate ? "translate" : "transcribe",
-                params.no_timestamps ? 0 : 1);
-
-        fprintf(stderr, "\n");
-    }
-
-
-    // init audio
-
-    audio_async audio(30*1000);
-    if (!audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
-        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
-        return 1;
-    }
-
-    audio.resume();
-
-    bool is_running  = true;
-    bool have_prompt = false;
-    bool ask_prompt  = true;
-
-    float prob0 = 0.0f;
-    float prob  = 0.0f;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    const std::string k_prompt = "Ok Whisper, start listening for commands.";
-
-    // main loop
-    while (is_running) {
-        // handle Ctrl + C
-        {
-            SDL_Event event;
-            while (SDL_PollEvent(&event)) {
-                switch (event.type) {
-                    case SDL_QUIT:
-                        {
-                            is_running = false;
-                        } break;
-                    default:
-                        break;
-                }
-            }
-
-            if (!is_running) {
-                break;
-            }
-        }
-
-        // delay
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-        if (ask_prompt) {
-            fprintf(stdout, "\n");
-            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-            fprintf(stdout, "\n");
-
-            ask_prompt = false;
-        }
-
-        int64_t t_ms = 0;
-
-        {
-            audio.get(2000, pcmf32_cur);
-
-            if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
-                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-
-                if (!have_prompt) {
-                    audio.get(params.prompt_ms, pcmf32_cur);
-
-                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob0, t_ms));
-
-                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
-
-                    const float sim = similarity(txt, k_prompt);
-
-                    if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
-                        fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
-                        ask_prompt = true;
-                    } else {
-                        fprintf(stdout, "\n");
-                        fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
-                        fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
-                        fprintf(stdout, "\n");
-
-                        // save the audio for the prompt
-                        pcmf32_prompt = pcmf32_cur;
-                        have_prompt = true;
-                    }
-                } else {
-                    audio.get(params.command_ms, pcmf32_cur);
-
-                    // prepend the prompt audio
-                    pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-
-                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
-
-                    prob = 100.0f*(prob - prob0);
-
-                    //fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
-
-                    // find the prompt in the text
-                    float best_sim = 0.0f;
-                    size_t best_len = 0;
-                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
-                        const auto prompt = txt.substr(0, n);
-
-                        const float sim = similarity(prompt, k_prompt);
-
-                        //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
-
-                        if (sim > best_sim) {
-                            best_sim = sim;
-                            best_len = n;
-                        }
-                    }
-
-                    const std::string command = ::trim(txt.substr(best_len));
-
-                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-                    fprintf(stdout, "\n");
-                }
-
-                audio.clear();
-            }
-        }
-    }
-
-    audio.pause();
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/generate-karaoke.sh
+++ b/examples/generate-karaoke.sh
@ -1,60 +0,0 @@
-#!/bin/bash
-
-# Simple tool to record audio from the microphone and generate a karaoke video
-# Usage:
-#
-#  cd whisper.cpp
-#  make
-#
-#  ./examples/generate-karaoke.sh [model] [step_ms]
-#
-# Press Ctrl+C to stop recording
-#
-
-executable="./main"
-model="base.en"
-model_path="models/ggml-$model.bin"
-
-# require sox and ffmpeg to be installed
-if ! command -v sox &> /dev/null
-then
-    echo "sox could not be found"
-    exit 1
-fi
-
-if ! command -v ffmpeg &> /dev/null
-then
-    echo "ffmpeg could not be found"
-    exit 2
-fi
-
-if [ ! -f "$executable" ]; then
-    echo "'$executable' does not exist. Please build it first."
-    exit 3
-fi
-
-if [ ! -f "$model_path" ]; then
-    echo "'$model_path' does not exist. Please download it first."
-    exit 4
-fi
-
-# record some raw audio
-sox -d rec.wav
-
-# resample to 16kHz
-ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1
-
-# run Whisper
-echo "Processing ..."
-./main -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1
-
-# generate Karaoke video
-echo "Generating video ..."
-source rec16.wav.wts > /dev/null 2>&1
-
-# play the video
-echo "Playing ./rec16.wav.mp4 ..."
-ffplay -loglevel 0 -autoexit ./rec16.wav.mp4
-
-echo "Done"
-exit 0
--- a/examples/helpers.js
+++ b/examples/helpers.js
@ -1,182 +0,0 @@
-// Common Javascript functions used by the examples
-
-function convertTypedArray(src, type) {
-    var buffer = new ArrayBuffer(src.byteLength);
-    var baseView = new src.constructor(buffer).set(src);
-    return new type(buffer);
-}
-
-var printTextarea = (function() {
-    var element = document.getElementById('output');
-    if (element) element.alue = ''; // clear browser cache
-    return function(text) {
-        if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
-        console.log(text);
-        if (element) {
-            element.value += text + "\n";
-            element.scrollTop = element.scrollHeight; // focus on bottom
-        }
-    };
-})();
-
-async function clearCache() {
-    if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
-        indexedDB.deleteDatabase(dbName);
-    }
-}
-
-// fetch a remote file from remote URL using the Fetch API
-async function fetchRemote(url, cbProgress, cbPrint) {
-    cbPrint('fetchRemote: downloading with fetch()...');
-
-    const response = await fetch(
-        url,
-        {
-            method: 'GET',
-            headers: {
-                'Content-Type': 'application/octet-stream',
-            },
-        }
-    );
-
-    if (!response.ok) {
-        cbPrint('fetchRemote: failed to fetch ' + url);
-        return;
-    }
-
-    const contentLength = response.headers.get('content-length');
-    const total = parseInt(contentLength, 10);
-    const reader = response.body.getReader();
-
-    var chunks = [];
-    var receivedLength = 0;
-    var progressLast = -1;
-
-    while (true) {
-        const { done, value } = await reader.read();
-
-        if (done) {
-            break;
-        }
-
-        chunks.push(value);
-        receivedLength += value.length;
-
-        if (contentLength) {
-            cbProgress(receivedLength/total);
-
-            var progressCur = Math.round((receivedLength / total) * 10);
-            if (progressCur != progressLast) {
-                cbPrint('fetchRemote: fetching ' + 10*progressCur + '% ...');
-                progressLast = progressCur;
-            }
-        }
-    }
-
-    var position = 0;
-    var chunksAll = new Uint8Array(receivedLength);
-
-    for (var chunk of chunks) {
-        chunksAll.set(chunk, position);
-        position += chunk.length;
-    }
-
-    return chunksAll;
-}
-
-// load remote data
-// - check if the data is already in the IndexedDB
-// - if not, fetch it from the remote URL and store it in the IndexedDB
-function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
-    // query the storage quota and print it
-    navigator.storage.estimate().then(function (estimate) {
-        cbPrint('loadRemote: storage quota: ' + estimate.quota + ' bytes');
-        cbPrint('loadRemote: storage usage: ' + estimate.usage + ' bytes');
-    });
-
-    // check if the data is already in the IndexedDB
-    var rq = indexedDB.open(dbName, dbVersion);
-
-    rq.onupgradeneeded = function (event) {
-        var db = event.target.result;
-        if (db.version == 1) {
-            var os = db.createObjectStore('models', { autoIncrement: false });
-            cbPrint('loadRemote: created IndexedDB ' + db.name + ' version ' + db.version);
-        } else {
-            // clear the database
-            var os = event.currentTarget.transaction.objectStore('models');
-            os.clear();
-            cbPrint('loadRemote: cleared IndexedDB ' + db.name + ' version ' + db.version);
-        }
-    };
-
-    rq.onsuccess = function (event) {
-        var db = event.target.result;
-        var tx = db.transaction(['models'], 'readonly');
-        var os = tx.objectStore('models');
-        var rq = os.get(url);
-
-        rq.onsuccess = function (event) {
-            if (rq.result) {
-                cbPrint('loadRemote: "' + url + '" is already in the IndexedDB');
-                cbReady(dst, rq.result);
-            } else {
-                // data is not in the IndexedDB
-                cbPrint('loadRemote: "' + url + '" is not in the IndexedDB');
-
-                // alert and ask the user to confirm
-                if (!confirm(
-                    'You are about to download ' + size_mb + ' MB of data.\n' +
-                    'The model data will be cached in the browser for future use.\n\n' +
-                    'Press OK to continue.')) {
-                    cbCancel();
-                    return;
-                }
-
-                fetchRemote(url, cbProgress, cbPrint).then(function (data) {
-                    if (data) {
-                        // store the data in the IndexedDB
-                        var rq = indexedDB.open(dbName, dbVersion);
-                        rq.onsuccess = function (event) {
-                            var db = event.target.result;
-                            var tx = db.transaction(['models'], 'readwrite');
-                            var os = tx.objectStore('models');
-                            var rq = os.put(data, url);
-
-                            rq.onsuccess = function (event) {
-                                cbPrint('loadRemote: "' + url + '" stored in the IndexedDB');
-                                cbReady(dst, data);
-                            };
-
-                            rq.onerror = function (event) {
-                                cbPrint('loadRemote: failed to store "' + url + '" in the IndexedDB');
-                                cbCancel();
-                            };
-                        };
-                    }
-                });
-            }
-        };
-
-        rq.onerror = function (event) {
-            cbPrint('loadRemote: failed to get data from the IndexedDB');
-            cbCancel();
-        };
-    };
-
-    rq.onerror = function (event) {
-        cbPrint('loadRemote: failed to open IndexedDB');
-        cbCancel();
-    };
-
-    rq.onblocked = function (event) {
-        cbPrint('loadRemote: failed to open IndexedDB: blocked');
-        cbCancel();
-    };
-
-    rq.onabort = function (event) {
-        cbPrint('loadRemote: failed to open IndexedDB: abort');
-
-    };
-}
-
--- a/examples/livestream.sh
+++ b/examples/livestream.sh
@ -1,112 +0,0 @@
-#!/bin/bash
-#
-# Transcribe audio livestream by feeding ffmpeg output to whisper.cpp at regular intervals
-# Idea by @semiformal-net
-# ref: https://github.com/ggerganov/whisper.cpp/issues/185
-#
-
-set -eo pipefail
-
-url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
-fmt=aac # the audio format extension of the stream (TODO: auto detect)
-step_s=30
-model="base.en"
-
-check_requirements()
-{
-    if ! command -v ./main &>/dev/null; then
-        echo "whisper.cpp main executable is required (make)"
-        exit 1
-    fi
-
-    if ! command -v ffmpeg &>/dev/null; then
-        echo "ffmpeg is required (https://ffmpeg.org)"
-        exit 1
-    fi
-}
-
-check_requirements
-
-
-if [ -z "$1" ]; then
-    echo "Usage: $0 stream_url [step_s] [model]"
-    echo ""
-    echo "  Example:"
-    echo "    $0 $url $step_s $model"
-    echo ""
-    echo "No url specified, using default: $url"
-else
-    url="$1"
-fi
-
-if [ -n "$2" ]; then
-    step_s="$2"
-fi
-
-if [ -n "$3" ]; then
-    model="$3"
-fi
-
-# Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
-
-# list available models
-function list_models {
-    printf "\n"
-    printf "  Available models:"
-    for model in "${models[@]}"; do
-        printf " $model"
-    done
-    printf "\n\n"
-}
-
-if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
-    printf "Invalid model: $model\n"
-    list_models
-
-    exit 1
-fi
-
-running=1
-
-trap "running=0" SIGINT SIGTERM
-
-printf "[+] Transcribing stream with model '$model', step_s $step_s (press Ctrl+C to stop):\n\n"
-
-# continuous stream in native fmt (this file will grow forever!)
-ffmpeg -loglevel quiet -y -re -probesize 32 -i $url -c copy /tmp/whisper-live0.${fmt} &
-if [ $? -ne 0 ]; then
-    printf "Error: ffmpeg failed to capture audio stream\n"
-    exit 1
-fi
-
-printf "Buffering audio. Please wait...\n\n"
-sleep $(($step_s))
-
-# do not stop script on error
-set +e
-
-i=0
-SECONDS=0
-while [ $running -eq 1 ]; do
-    # extract the next piece from the main file above and transcode to wav. -ss sets start time and nudges it by -0.5s to catch missing words (??)
-    err=1
-    while [ $err -ne 0 ]; do
-        if [ $i -gt 0 ]; then
-            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s-1)).5 -t $step_s /tmp/whisper-live.wav 2> /tmp/whisper-live.err
-        else
-            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s)) -t $step_s /tmp/whisper-live.wav 2> /tmp/whisper-live.err
-        fi
-        err=$(cat /tmp/whisper-live.err | wc -l)
-    done
-
-    ./main -t 8 -m ./models/ggml-base.en.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
-
-    while [ $SECONDS -lt $((($i+1)*$step_s)) ]; do
-        sleep 1
-    done
-    ((i=i+1))
-done
-
-killall -v ffmpeg
-killall -v main
--- a/examples/main/CMakeLists.txt
+++ b/examples/main/CMakeLists.txt
@ -1,3 +0,0 @@
-set(TARGET main)
-add_executable(${TARGET} main.cpp)
-target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -1,33 +0,0 @@
-# main
-
-This is the main example demonstrating most of the functionality of the Whisper model.
-It can be used as a reference for using the `whisper.cpp` library in other projects.
-
-```
-./main -h
-
-usage: ./main [options] file0.wav file1.wav ...
-
-options:
-  -h,       --help          [default] show this help message and exit
-  -t N,     --threads N     [4      ] number of threads to use during computation
-  -p N,     --processors N  [1      ] number of processors to use during computation
-  -ot N,    --offset-t N    [0      ] time offset in milliseconds
-  -on N,    --offset-n N    [0      ] segment index offset
-  -d  N,    --duration N    [0      ] duration of audio to process in milliseconds
-  -mc N,    --max-context N [-1     ] maximum number of text context tokens to store
-  -ml N,    --max-len N     [0      ] maximum segment length in characters
-  -wt N,    --word-thold N  [0.01   ] word timestamp probability threshold
-  -su,      --speed-up      [false  ] speed up audio by x2 (reduced accuracy)
-  -tr,      --translate     [false  ] translate from source language to english
-  -otxt,    --output-txt    [false  ] output result in a text file
-  -ovtt,    --output-vtt    [false  ] output result in a vtt file
-  -osrt,    --output-srt    [false  ] output result in a srt file
-  -owts,    --output-words  [false  ] output script for generating karaoke video
-  -ps,      --print-special [false  ] print special tokens
-  -pc,      --print-colors  [false  ] print colors
-  -nt,      --no-timestamps [true   ] do not print timestamps
-  -l LANG,  --language LANG [en     ] spoken language
-  -m FNAME, --model FNAME   [models/ggml-base.en.bin] model path
-  -f FNAME, --file FNAME    [       ] input WAV file path
-```
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -1,663 +0,0 @@
-#include "whisper.h"
-
-// third-party utilities
-// use your favorite implementations
-#define DR_WAV_IMPLEMENTATION
-#include "dr_wav.h"
-
-#include <cmath>
-#include <fstream>
-#include <cstdio>
-#include <string>
-#include <thread>
-#include <vector>
-
-// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
-// Lowest is red, middle is yellow, highest is green.
-const std::vector<std::string> k_colors = {
-    "\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
-    "\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
-};
-
-//  500 -> 00:05.000
-// 6000 -> 01:00.000
-std::string to_timestamp(int64_t t, bool comma = false) {
-    int64_t msec = t * 10;
-    int64_t hr = msec / (1000 * 60 * 60);
-    msec = msec - hr * (1000 * 60 * 60);
-    int64_t min = msec / (1000 * 60);
-    msec = msec - min * (1000 * 60);
-    int64_t sec = msec / 1000;
-    msec = msec - sec * 1000;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
-
-    return std::string(buf);
-}
-
-int timestamp_to_sample(int64_t t, int n_samples) {
-    return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
-}
-
-// helper function to replace substrings
-void replace_all(std::string & s, const std::string & search, const std::string & replace) {
-    for (size_t pos = 0; ; pos += replace.length()) {
-        pos = s.find(search, pos);
-        if (pos == std::string::npos) break;
-        s.erase(pos, search.length());
-        s.insert(pos, replace);
-    }
-}
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t n_processors = 1;
-    int32_t offset_t_ms  = 0;
-    int32_t offset_n     = 0;
-    int32_t duration_ms  = 0;
-    int32_t max_context  = -1;
-    int32_t max_len      = 0;
-
-    float word_thold = 0.01f;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool diarize       = false;
-    bool output_txt    = false;
-    bool output_vtt    = false;
-    bool output_srt    = false;
-    bool output_wts    = false;
-    bool print_special = false;
-    bool print_colors  = false;
-    bool no_timestamps = false;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-
-    std::vector<std::string> fname_inp = {};
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg[0] != '-') {
-            params.fname_inp.push_back(arg);
-            continue;
-        }
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"    || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (arg == "-p"    || arg == "--processors")    { params.n_processors  = std::stoi(argv[++i]); }
-        else if (arg == "-ot"   || arg == "--offset-t")      { params.offset_t_ms   = std::stoi(argv[++i]); }
-        else if (arg == "-on"   || arg == "--offset-n")      { params.offset_n      = std::stoi(argv[++i]); }
-        else if (arg == "-d"    || arg == "--duration")      { params.duration_ms   = std::stoi(argv[++i]); }
-        else if (arg == "-mc"   || arg == "--max-context")   { params.max_context   = std::stoi(argv[++i]); }
-        else if (arg == "-ml"   || arg == "--max-len")       { params.max_len       = std::stoi(argv[++i]); }
-        else if (arg == "-wt"   || arg == "--word-thold")    { params.word_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"   || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"   || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-di"   || arg == "--diarize")       { params.diarize       = true; }
-        else if (arg == "-otxt" || arg == "--output-txt")    { params.output_txt    = true; }
-        else if (arg == "-ovtt" || arg == "--output-vtt")    { params.output_vtt    = true; }
-        else if (arg == "-osrt" || arg == "--output-srt")    { params.output_srt    = true; }
-        else if (arg == "-owts" || arg == "--output-words")  { params.output_wts    = true; }
-        else if (arg == "-ps"   || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-pc"   || arg == "--print-colors")  { params.print_colors  = true; }
-        else if (arg == "-nt"   || arg == "--no-timestamps") { params.no_timestamps = true; }
-        else if (arg == "-l"    || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-m"    || arg == "--model")         { params.model         = argv[++i]; }
-        else if (arg == "-f"    || arg == "--file")          { params.fname_inp.push_back(argv[++i]); }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help          [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N     [%-7d] number of threads to use during computation\n",    params.n_threads);
-    fprintf(stderr, "  -p N,     --processors N  [%-7d] number of processors to use during computation\n", params.n_processors);
-    fprintf(stderr, "  -ot N,    --offset-t N    [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
-    fprintf(stderr, "  -on N,    --offset-n N    [%-7d] segment index offset\n",                           params.offset_n);
-    fprintf(stderr, "  -d  N,    --duration N    [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
-    fprintf(stderr, "  -mc N,    --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
-    fprintf(stderr, "  -ml N,    --max-len N     [%-7d] maximum segment length in characters\n",           params.max_len);
-    fprintf(stderr, "  -wt N,    --word-thold N  [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
-    fprintf(stderr, "  -su,      --speed-up      [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
-    fprintf(stderr, "  -di,      --diarize       [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
-    fprintf(stderr, "  -otxt,    --output-txt    [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
-    fprintf(stderr, "  -ovtt,    --output-vtt    [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
-    fprintf(stderr, "  -osrt,    --output-srt    [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
-    fprintf(stderr, "  -owts,    --output-words  [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
-    fprintf(stderr, "  -ps,      --print-special [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pc,      --print-colors  [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
-    fprintf(stderr, "  -nt,      --no-timestamps [%-7s] do not print timestamps\n",                        params.no_timestamps ? "false" : "true");
-    fprintf(stderr, "  -l LANG,  --language LANG [%-7s] spoken language\n",                                params.language.c_str());
-    fprintf(stderr, "  -m FNAME, --model FNAME   [%-7s] model path\n",                                     params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME    [%-7s] input WAV file path\n",                            "");
-    fprintf(stderr, "\n");
-}
-
-struct whisper_print_user_data {
-    const whisper_params * params;
-
-    const std::vector<std::vector<float>> * pcmf32s;
-};
-
-void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, void * user_data) {
-    const auto & params  = *((whisper_print_user_data *) user_data)->params;
-    const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
-
-    const int n_segments = whisper_full_n_segments(ctx);
-
-    // print the last n_new segments
-    const int s0 = n_segments - n_new;
-    if (s0 == 0) {
-        printf("\n");
-    }
-
-    for (int i = s0; i < n_segments; i++) {
-        if (params.no_timestamps) {
-            if (params.print_colors) {
-                for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
-                    if (params.print_special == false) {
-                        const whisper_token id = whisper_full_get_token_id(ctx, i, j);
-                        if (id >= whisper_token_eot(ctx)) {
-                            continue;
-                        }
-                    }
-
-                    const char * text = whisper_full_get_token_text(ctx, i, j);
-                    const float  p    = whisper_full_get_token_p   (ctx, i, j);
-
-                    const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
-
-                    printf("%s%s%s", k_colors[col].c_str(), text, "\033[0m");
-                }
-            } else {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-                printf("%s", text);
-            }
-            fflush(stdout);
-        } else {
-            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-            std::string speaker = "";
-
-            if (params.diarize && pcmf32s.size() == 2) {
-                const int64_t n_samples = pcmf32s[0].size();
-
-                const int64_t is0 = timestamp_to_sample(t0, n_samples);
-                const int64_t is1 = timestamp_to_sample(t1, n_samples);
-
-                double energy0 = 0.0f;
-                double energy1 = 0.0f;
-
-                for (int64_t j = is0; j < is1; j++) {
-                    energy0 += fabs(pcmf32s[0][j]);
-                    energy1 += fabs(pcmf32s[1][j]);
-                }
-
-                if (energy0 > 1.1*energy1) {
-                    speaker = "(speaker 0)";
-                } else if (energy1 > 1.1*energy0) {
-                    speaker = "(speaker 1)";
-                } else {
-                    speaker = "(speaker ?)";
-                }
-
-                //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
-            }
-
-            if (params.print_colors) {
-                printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
-                for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
-                    if (params.print_special == false) {
-                        const whisper_token id = whisper_full_get_token_id(ctx, i, j);
-                        if (id >= whisper_token_eot(ctx)) {
-                            continue;
-                        }
-                    }
-
-                    const char * text = whisper_full_get_token_text(ctx, i, j);
-                    const float  p    = whisper_full_get_token_p   (ctx, i, j);
-
-                    const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
-
-                    printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
-                }
-                printf("\n");
-            } else {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-
-                printf("[%s --> %s]  %s%s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), speaker.c_str(), text);
-            }
-        }
-    }
-}
-
-bool output_txt(struct whisper_context * ctx, const char * fname) {
-    std::ofstream fout(fname);
-    if (!fout.is_open()) {
-        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
-        return false;
-    }
-
-    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-        fout << text << "\n";
-    }
-
-    return true;
-}
-
-bool output_vtt(struct whisper_context * ctx, const char * fname) {
-    std::ofstream fout(fname);
-    if (!fout.is_open()) {
-        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
-        return false;
-    }
-
-    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
-
-    fout << "WEBVTT\n\n";
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-        fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
-        fout << text << "\n\n";
-    }
-
-    return true;
-}
-
-bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params) {
-    std::ofstream fout(fname);
-    if (!fout.is_open()) {
-        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
-        return false;
-    }
-
-    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-        fout << i + 1 + params.offset_n << "\n";
-        fout << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
-        fout << text << "\n\n";
-    }
-
-    return true;
-}
-
-// karaoke video generation
-// outputs a bash script that uses ffmpeg to generate a video with the subtitles
-// TODO: font parameter adjustments
-bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
-    std::ofstream fout(fname);
-
-    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
-
-    // TODO: become parameter
-    static const char * font = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
-
-    fout << "#!/bin/bash" << "\n";
-    fout << "\n";
-
-    fout << "ffmpeg -i " << fname_inp << " -f lavfi -i color=size=1200x120:duration=" << t_sec << ":rate=25:color=black -vf \"";
-
-    for (int i = 0; i < whisper_full_n_segments(ctx); i++) {
-        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-        const int n = whisper_full_n_tokens(ctx, i);
-
-        std::vector<whisper_token_data> tokens(n);
-        for (int j = 0; j < n; ++j) {
-            tokens[j] = whisper_full_get_token_data(ctx, i, j);
-        }
-
-        if (i > 0) {
-            fout << ",";
-        }
-
-        // background text
-        fout << "drawtext=fontfile='" << font << "':fontsize=24:fontcolor=gray:x=(w-text_w)/2:y=h/2:text='':enable='between(t," << t0/100.0 << "," << t0/100.0 << ")'";
-
-        bool is_first = true;
-
-        for (int j = 0; j < n; ++j) {
-            const auto & token = tokens[j];
-
-            if (tokens[j].id >= whisper_token_eot(ctx)) {
-                continue;
-            }
-
-            std::string txt_bg;
-            std::string txt_fg; // highlight token
-            std::string txt_ul; // underline
-
-            txt_bg = "> ";
-            txt_fg = "> ";
-            txt_ul = "\\ \\ ";
-
-            {
-                int ncnt = 0;
-                for (int k = 0; k < n; ++k) {
-                    const auto & token2 = tokens[k];
-
-                    if (tokens[k].id >= whisper_token_eot(ctx)) {
-                        continue;
-                    }
-
-                    const std::string txt = whisper_token_to_str(ctx, token2.id);
-
-                    txt_bg += txt;
-
-                    if (k == j) {
-                        for (int l = 0; l < (int) txt.size(); ++l) {
-                            txt_fg += txt[l];
-                            txt_ul += "_";
-                        }
-                        txt_fg += "|";
-                    } else {
-                        for (int l = 0; l < (int) txt.size(); ++l) {
-                            txt_fg += "\\ ";
-                            txt_ul += "\\ ";
-                        }
-                    }
-
-                    ncnt += txt.size();
-                }
-
-                ::replace_all(txt_bg, "'", "\u2019");
-                ::replace_all(txt_bg, "\"", "\\\"");
-                ::replace_all(txt_fg, "'", "\u2019");
-                ::replace_all(txt_fg, "\"", "\\\"");
-            }
-
-            if (is_first) {
-                // background text
-                fout << ",drawtext=fontfile='" << font << "':fontsize=24:fontcolor=gray:x=(w-text_w)/2:y=h/2:text='" << txt_bg << "':enable='between(t," << t0/100.0 << "," << t1/100.0 << ")'";
-                is_first = false;
-            }
-
-            // foreground text
-            fout << ",drawtext=fontfile='" << font << "':fontsize=24:fontcolor=lightgreen:x=(w-text_w)/2+8:y=h/2:text='" << txt_fg << "':enable='between(t," << token.t0/100.0 << "," << token.t1/100.0 << ")'";
-
-            // underline
-            fout << ",drawtext=fontfile='" << font << "':fontsize=24:fontcolor=lightgreen:x=(w-text_w)/2+8:y=h/2+16:text='" << txt_ul << "':enable='between(t," << token.t0/100.0 << "," << token.t1/100.0 << ")'";
-        }
-    }
-
-    fout << "\" -c:v libx264 -pix_fmt yuv420p -y " << fname_inp << ".mp4" << "\n";
-
-    fout << "\n\n";
-    fout << "echo \"Your video has been saved to " << fname_inp << ".mp4\"" << "\n";
-    fout << "\n";
-    fout << "echo \"  ffplay " << fname_inp << ".mp4\"\n";
-    fout << "\n";
-
-    fout.close();
-
-    fprintf(stderr, "%s: run 'source %s' to generate karaoke video\n", __func__, fname);
-
-    return true;
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (params.fname_inp.empty()) {
-        fprintf(stderr, "error: no input files specified\n");
-        whisper_print_usage(argc, argv, params);
-        return 2;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx = whisper_init(params.model.c_str());
-
-    if (ctx == nullptr) {
-        fprintf(stderr, "error: failed to initialize whisper context\n");
-        return 3;
-    }
-
-    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
-        const auto fname_inp = params.fname_inp[f];
-
-        std::vector<float> pcmf32; // mono-channel F32 PCM
-        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
-
-        // WAV input
-        {
-            drwav wav;
-            std::vector<uint8_t> wav_data; // used for pipe input from stdin
-
-            if (fname_inp == "-") {
-                {
-                    uint8_t buf[1024];
-                    while (true)
-                    {
-                        const size_t n = fread(buf, 1, sizeof(buf), stdin);
-                        if (n == 0) {
-                            break;
-                        }
-                        wav_data.insert(wav_data.end(), buf, buf + n);
-                    }
-                }
-
-                if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), NULL) == false) {
-                    fprintf(stderr, "error: failed to open WAV file from stdin\n");
-                    return 4;
-                }
-
-                fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
-            }
-            else if (drwav_init_file(&wav, fname_inp.c_str(), NULL) == false) {
-                fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
-                return 5;
-            }
-
-            if (wav.channels != 1 && wav.channels != 2) {
-                fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
-                return 6;
-            }
-
-            if (params.diarize && wav.channels != 2 && params.no_timestamps == false) {
-                fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", argv[0], fname_inp.c_str());
-                return 6;
-            }
-
-            if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
-                fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
-                return 8;
-            }
-
-            if (wav.bitsPerSample != 16) {
-                fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
-                return 9;
-            }
-
-            const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
-
-            std::vector<int16_t> pcm16;
-            pcm16.resize(n*wav.channels);
-            drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
-            drwav_uninit(&wav);
-
-            // convert to mono, float
-            pcmf32.resize(n);
-            if (wav.channels == 1) {
-                for (int i = 0; i < n; i++) {
-                    pcmf32[i] = float(pcm16[i])/32768.0f;
-                }
-            } else {
-                for (int i = 0; i < n; i++) {
-                    pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
-                }
-            }
-
-            if (params.diarize) {
-                // convert to stereo, float
-                pcmf32s.resize(2);
-
-                pcmf32s[0].resize(n);
-                pcmf32s[1].resize(n);
-                for (int i = 0; i < n; i++) {
-                    pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
-                    pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
-                }
-            }
-        }
-
-        // print system information
-        {
-            fprintf(stderr, "\n");
-            fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-                    params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
-        }
-
-        // print some info about the processing
-        {
-            fprintf(stderr, "\n");
-            if (!whisper_is_multilingual(ctx)) {
-                if (params.language != "en" || params.translate) {
-                    params.language = "en";
-                    params.translate = false;
-                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-                }
-            }
-            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d ...\n",
-                    __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                    params.n_threads, params.n_processors,
-                    params.language.c_str(),
-                    params.translate ? "translate" : "transcribe",
-                    params.no_timestamps ? 0 : 1);
-
-            fprintf(stderr, "\n");
-        }
-
-
-        // run the inference
-        {
-            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-            wparams.print_realtime   = false;
-            wparams.print_progress   = false;
-            wparams.print_timestamps = !params.no_timestamps;
-            wparams.print_special    = params.print_special;
-            wparams.translate        = params.translate;
-            wparams.language         = params.language.c_str();
-            wparams.n_threads        = params.n_threads;
-            wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
-            wparams.offset_ms        = params.offset_t_ms;
-            wparams.duration_ms      = params.duration_ms;
-
-            wparams.token_timestamps = params.output_wts || params.max_len > 0;
-            wparams.thold_pt         = params.word_thold;
-            wparams.max_len          = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
-
-            wparams.speed_up         = params.speed_up;
-
-            whisper_print_user_data user_data = { &params, &pcmf32s };
-
-            // this callback is called on each new segment
-            if (!wparams.print_realtime) {
-                wparams.new_segment_callback           = whisper_print_segment_callback;
-                wparams.new_segment_callback_user_data = &user_data;
-            }
-
-            // example for abort mechanism
-            // in this example, we do not abort the processing, but we could if the flag is set to true
-            // the callback is called before every encoder run - if it returns false, the processing is aborted
-            {
-                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
-
-                wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
-                    bool is_aborted = *(bool*)user_data;
-                    return !is_aborted;
-                };
-                wparams.encoder_begin_callback_user_data = &is_aborted;
-            }
-
-            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
-                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
-                return 10;
-            }
-        }
-
-        // output stuff
-        {
-            printf("\n");
-
-            // output to text file
-            if (params.output_txt) {
-                const auto fname_txt = fname_inp + ".txt";
-                output_txt(ctx, fname_txt.c_str());
-            }
-
-            // output to VTT file
-            if (params.output_vtt) {
-                const auto fname_vtt = fname_inp + ".vtt";
-                output_vtt(ctx, fname_vtt.c_str());
-            }
-
-            // output to SRT file
-            if (params.output_srt) {
-                const auto fname_srt = fname_inp + ".srt";
-                output_srt(ctx, fname_srt.c_str(), params);
-            }
-
-            // output to WTS file
-            if (params.output_wts) {
-                const auto fname_wts = fname_inp + ".wts";
-                output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
-            }
-        }
-    }
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/stream.wasm/CMakeLists.txt
+++ b/examples/stream.wasm/CMakeLists.txt
@ -1,47 +0,0 @@
-#
-# libstream
-#
-
-set(TARGET libstream)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside stream.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libstream.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/stream.wasm/stream.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-#
-# stream.wasm
-#
-
-set(TARGET stream.wasm)
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/stream.wasm/README.md
+++ b/examples/stream.wasm/README.md
@ -1,20 +0,0 @@
-# stream.wasm
-
-Real-time transcription in the browser using WebAssembly
-
-Online demo: https://whisper.ggerganov.com/stream/
-
-## Build instructions
-
-```bash
-# build using Emscripten (v3.1.2)
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-mkdir build-em && cd build-em
-emcmake cmake ..
-make -j
-
-# copy the produced page to your HTTP path
-cp bin/stream.wasm/*       /path/to/html/
-cp bin/libstream.worker.js /path/to/html/
-```
--- a/examples/stream.wasm/emscripten.cpp
+++ b/examples/stream.wasm/emscripten.cpp
@ -1,213 +0,0 @@
-#include "ggml.h"
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <atomic>
-#include <cmath>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-
-constexpr int N_THREAD = 8;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::mutex g_mutex;
-std::thread g_worker;
-
-std::atomic<bool> g_running(false);
-
-std::string g_status        = "";
-std::string g_status_forced = "";
-std::string g_transcribed   = "";
-
-std::vector<float> g_pcmf32;
-
-void stream_set_status(const std::string & status) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_status = status;
-}
-
-void stream_main(size_t index) {
-    stream_set_status("loading data ...");
-
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 768; // partial encoder context for better performance
-
-    wparams.language         = "en";
-
-    printf("stream: using %d threads\n", wparams.n_threads);
-
-    std::vector<float> pcmf32;
-
-    // whisper context
-    auto & ctx = g_contexts[index];
-
-    // 5 seconds interval
-    const int64_t window_samples = 5*WHISPER_SAMPLE_RATE;
-
-    while (g_running) {
-        stream_set_status("waiting for audio ...");
-
-        {
-            std::unique_lock<std::mutex> lock(g_mutex);
-
-            if (g_pcmf32.size() < 1024) {
-                lock.unlock();
-
-                std::this_thread::sleep_for(std::chrono::milliseconds(10));
-
-                continue;
-            }
-
-            pcmf32 = std::vector<float>(g_pcmf32.end() - std::min((int64_t) g_pcmf32.size(), window_samples), g_pcmf32.end());
-            g_pcmf32.clear();
-        }
-
-        {
-            const auto t_start = std::chrono::high_resolution_clock::now();
-
-            stream_set_status("running whisper ...");
-
-            int ret = whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size());
-            if (ret != 0) {
-                printf("whisper_full() failed: %d\n", ret);
-                break;
-            }
-
-            const auto t_end = std::chrono::high_resolution_clock::now();
-
-            printf("stream: whisper_full() returned %d in %f seconds\n", ret, std::chrono::duration<double>(t_end - t_start).count());
-        }
-
-        {
-            std::string text_heard;
-
-            {
-                const int n_segments = whisper_full_n_segments(ctx);
-                for (int i = n_segments - 1; i < n_segments; ++i) {
-                    const char * text = whisper_full_get_segment_text(ctx, i);
-
-                    const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                    const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-                    printf("transcribed: %s\n", text);
-
-                    text_heard += text;
-                }
-            }
-
-            {
-                std::lock_guard<std::mutex> lock(g_mutex);
-                g_transcribed = text_heard;
-            }
-        }
-    }
-
-    if (index < g_contexts.size()) {
-        whisper_free(g_contexts[index]);
-        g_contexts[index] = nullptr;
-    }
-}
-
-EMSCRIPTEN_BINDINGS(stream) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    g_running = true;
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        stream_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (g_running) {
-            g_running = false;
-        }
-    }));
-
-    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            const int n = audio["length"].as<int>();
-
-            emscripten::val heap = emscripten::val::module_property("HEAPU8");
-            emscripten::val memory = heap["buffer"];
-
-            g_pcmf32.resize(n);
-
-            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
-            memoryView.call<void>("set", audio);
-        }
-
-        return 0;
-    }));
-
-    emscripten::function("get_transcribed", emscripten::optional_override([]() {
-        std::string transcribed;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            transcribed = std::move(g_transcribed);
-        }
-
-        return transcribed;
-    }));
-
-    emscripten::function("get_status", emscripten::optional_override([]() {
-        std::string status;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            status = g_status_forced.empty() ? g_status : g_status_forced;
-        }
-
-        return status;
-    }));
-
-    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            g_status_forced = status;
-        }
-    }));
-}
--- a/examples/stream.wasm/index-tmpl.html
+++ b/examples/stream.wasm/index-tmpl.html
@ -1,386 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>stream : Real-time Whisper transcription in WebAssembly</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="main-container">
-            <b>stream : Real-time Whisper transcription in WebAssembly</b>
-
-            <br><br>
-
-            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/stream.wasm">GitHub</a>.
-
-            <br><br>
-
-            <hr>
-
-            Select the model you would like to use, click the "Start" button and start speaking
-
-            <br><br>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-
-                <!--
-                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-                -->
-            </div>
-
-            <br>
-
-            <div id="input">
-                <button id="start"  onclick="onStart()" disabled>Start</button>
-                <button id="stop"   onclick="onStop()" disabled>Stop</button>
-                <button id="clear"  onclick="clearCache()">Clear Cache</button>
-            </div>
-
-            <br>
-
-            <div id="state">
-                Status: <b><span id="state-status">not started</span></b>
-
-                <pre id="state-transcribed">[The transcribed text will be displayed here]</pre>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/stream.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="helpers.js"></script>
-        <script type='text/javascript'>
-            // web audio context
-            var context = null;
-
-            // audio data
-            var audio = null;
-            var audio0 = null;
-
-            // the stream instance
-            var instance = null;
-
-            // model name
-            var model_whisper = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Initialized successfully!');
-                }
-            };
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                if (model_whisper != null) {
-                    document.getElementById('start').disabled = false;
-                    document.getElementById('stop' ).disabled = true;
-                }
-            }
-
-            function loadWhisper(model) {
-                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                };
-
-                let sizes = {
-                    'tiny.en': 75,
-                    'base.en': 142,
-                };
-
-                let url     = urls[model];
-                let dst     = 'whisper.bin';
-                let size_mb = sizes[model];
-
-                model_whisper = model;
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // microphone
-            //
-
-            const kSampleRate = 16000;
-            const kRestartRecording_s = 120;
-            const kIntervalAudio_ms = 5000; // pass the recorded audio to the C++ instance at this rate
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function stopRecording() {
-                Module.set_status("paused");
-                doRecording = false;
-                audio0 = null;
-                audio = null;
-                context = null;
-            }
-
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                Module.set_status("");
-
-                document.getElementById('start').disabled = true;
-                document.getElementById('stop').disabled = false;
-
-                doRecording = true;
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            var reader = new FileReader();
-
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-
-                                if (!context) {
-                                    return;
-                                }
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        audio = renderedBuffer.getChannelData(0);
-
-                                        //printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
-
-                                        var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
-                                        if (audio0 != null) {
-                                            audioAll.set(audio0, 0);
-                                        }
-                                        audioAll.set(audio, audio0 == null ? 0 : audio0.length);
-
-                                        if (instance) {
-                                            Module.set_audio(instance, audioAll);
-                                        }
-                                    });
-                                }, function(e) {
-                                    audio = null;
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-
-                        mediaRecorder.onstop = function(e) {
-                            if (doRecording) {
-                                setTimeout(function() {
-                                    startRecording();
-                                });
-                            }
-                        };
-
-                        mediaRecorder.start(kIntervalAudio_ms);
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-
-                var interval = setInterval(function() {
-                    if (!doRecording) {
-                        clearInterval(interval);
-                        mediaRecorder.stop();
-                        stream.getTracks().forEach(function(track) {
-                            track.stop();
-                        });
-
-                        document.getElementById('start').disabled = false;
-                        document.getElementById('stop').disabled  = true;
-
-                        mediaRecorder = null;
-                    }
-
-                    // if audio length is more than kRestartRecording_s seconds, restart recording
-                    if (audio != null && audio.length > kSampleRate*kRestartRecording_s) {
-                        if (doRecording) {
-                            //printTextarea('js: restarting recording');
-
-                            clearInterval(interval);
-                            audio0 = audio;
-                            audio = null;
-                            mediaRecorder.stop();
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        }
-                    }
-                }, 100);
-            }
-
-            //
-            // main
-            //
-
-            var nLines = 0;
-            var intervalUpdate = null;
-            var transcribedAll = '';
-
-            function onStart() {
-                if (!instance) {
-                    instance = Module.init('whisper.bin');
-
-                    if (instance) {
-                        printTextarea("js: whisper initialized, instance: " + instance);
-                    }
-                }
-
-                if (!instance) {
-                    printTextarea("js: failed to initialize whisper");
-                    return;
-                }
-
-                startRecording();
-
-                intervalUpdate = setInterval(function() {
-                    var transcribed = Module.get_transcribed();
-
-                    if (transcribed != null && transcribed.length > 1) {
-                        transcribedAll += transcribed + '<br>';
-                        nLines++;
-
-                        // if more than 10 lines, remove the first line
-                        if (nLines > 10) {
-                            var i = transcribedAll.indexOf('<br>');
-                            if (i > 0) {
-                                transcribedAll = transcribedAll.substring(i + 4);
-                                nLines--;
-                            }
-                        }
-                    }
-
-                    document.getElementById('state-status').innerHTML = Module.get_status();
-                    document.getElementById('state-transcribed').innerHTML = transcribedAll;
-                }, 100);
-            }
-
-            function onStop() {
-                stopRecording();
-            }
-
-        </script>
-        <script type="text/javascript" src="stream.js"></script>
-    </body>
-</html>
--- a/examples/stream/CMakeLists.txt
+++ b/examples/stream/CMakeLists.txt
@ -1,7 +0,0 @@
-if (WHISPER_SUPPORT_SDL2)
-    # stream
-    set(TARGET stream)
-    add_executable(${TARGET} stream.cpp)
-    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
-    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-endif ()
--- a/examples/stream/README.md
+++ b/examples/stream/README.md
@ -1,27 +0,0 @@
-# stream
-
-This is a naive example of performing real-time inference on audio from your microphone.
-The `stream` tool samples the audio every half a second and runs the transcription continously.
-More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
-
-```java
-./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
-```
-
-https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
-
-The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
-
-```bash
-# Install SDL2 on Linux
-sudo apt-get install libsdl2-dev
-
-# Install SDL2 on Mac OS
-brew install sdl2
-
-make stream
-```
-
-## Web version
-
-This tool can also run in the browser: [examples/stream.wasm](/examples/stream.wasm)
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -1,395 +0,0 @@
-// Real-time speech recognition of input from a microphone
-//
-// A very quick-n-dirty implementation serving mainly as a proof of concept.
-
-#include "whisper.h"
-
-#include <SDL.h>
-#include <SDL_audio.h>
-
-#include <cassert>
-#include <cstdio>
-#include <string>
-#include <thread>
-#include <vector>
-#include <fstream>
-
-//  500 -> 00:05.000
-// 6000 -> 01:00.000
-std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
-
-    return std::string(buf);
-}
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t step_ms    = 3000;
-    int32_t length_ms  = 10000;
-    int32_t capture_id = -1;
-    int32_t max_tokens = 32;
-    int32_t audio_ctx  = 0;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool no_context    = true;
-    bool print_special = false;
-    bool no_timestamps = true;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out = "";
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (                 arg == "--step")          { params.step_ms       = std::stoi(argv[++i]); }
-        else if (                 arg == "--length")        { params.length_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
-        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
-        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-kc"  || arg == "--keep-context")  { params.no_context    = false; }
-        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
-        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help          [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N     [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "            --step N        [%-7d] audio step size in milliseconds\n",             params.step_ms);
-    fprintf(stderr, "            --length N      [%-7d] audio length in milliseconds\n",                params.length_ms);
-    fprintf(stderr, "  -c ID,    --capture ID    [%-7d] capture device ID\n",                           params.capture_id);
-    fprintf(stderr, "  -mt N,    --max-tokens N  [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
-    fprintf(stderr, "  -ac N,    --audio-ctx N   [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
-    fprintf(stderr, "  -su,      --speed-up      [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
-    fprintf(stderr, "  -kc,      --keep-context  [%-7s] keep context between audio chunks\n",           params.no_context ? "false" : "true");
-    fprintf(stderr, "  -ps,      --print-special [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
-    fprintf(stderr, "  -l LANG,  --language LANG [%-7s] spoken language\n",                             params.language.c_str());
-    fprintf(stderr, "  -m FNAME, --model FNAME   [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME    [%-7s] text output file name\n",                       params.fname_out.c_str());
-    fprintf(stderr, "\n");
-}
-
-//
-// SDL Audio capture
-//
-
-SDL_AudioDeviceID g_dev_id_in = 0;
-
-bool audio_sdl_init(const int capture_id) {
-    if (g_dev_id_in) {
-        fprintf(stderr, "%s: already initialized\n", __func__);
-        return false;
-    }
-
-    SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
-
-    if (SDL_Init(SDL_INIT_AUDIO) < 0) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
-        return (1);
-    }
-
-    SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium", SDL_HINT_OVERRIDE);
-
-    {
-        int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
-        fprintf(stderr, "%s: found %d capture devices:\n", __func__, nDevices);
-        for (int i = 0; i < nDevices; i++) {
-            fprintf(stderr, "%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
-        }
-    }
-
-    SDL_AudioSpec capture_spec_requested;
-    SDL_AudioSpec capture_spec_obtained;
-
-    SDL_zero(capture_spec_requested);
-    SDL_zero(capture_spec_obtained);
-
-    capture_spec_requested.freq     = WHISPER_SAMPLE_RATE;
-    capture_spec_requested.format   = AUDIO_F32;
-    capture_spec_requested.channels = 1;
-    capture_spec_requested.samples  = 1024;
-
-    if (capture_id >= 0) {
-        fprintf(stderr, "%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
-        g_dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(capture_id, SDL_TRUE), SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    } else {
-        fprintf(stderr, "%s: attempt to open default capture device ...\n", __func__);
-        g_dev_id_in = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    }
-    if (!g_dev_id_in) {
-        fprintf(stderr, "%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
-        g_dev_id_in = 0;
-    } else {
-        fprintf(stderr, "%s: obtained spec for input device (SDL Id = %d):\n", __func__, g_dev_id_in);
-        fprintf(stderr, "%s:     - sample rate:       %d\n", __func__, capture_spec_obtained.freq);
-        fprintf(stderr, "%s:     - format:            %d (required: %d)\n", __func__, capture_spec_obtained.format, capture_spec_requested.format);
-        fprintf(stderr, "%s:     - channels:          %d (required: %d)\n", __func__, capture_spec_obtained.channels, capture_spec_requested.channels);
-        fprintf(stderr, "%s:     - samples per frame: %d\n", __func__, capture_spec_obtained.samples);
-    }
-
-    return true;
-}
-
-///////////////////////////
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    // init audio
-
-    if (!audio_sdl_init(params.capture_id)) {
-        fprintf(stderr, "%s: audio_sdl_init() failed!\n", __func__);
-        return 1;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx = whisper_init(params.model.c_str());
-
-    const int n_samples = (params.step_ms/1000.0)*WHISPER_SAMPLE_RATE;
-    const int n_samples_len = (params.length_ms/1000.0)*WHISPER_SAMPLE_RATE;
-    const int n_samples_30s = 30*WHISPER_SAMPLE_RATE;
-    const int n_samples_keep = 0.2*WHISPER_SAMPLE_RATE;
-
-    std::vector<float> pcmf32(n_samples_30s, 0.0f);
-    std::vector<float> pcmf32_old;
-
-    std::vector<whisper_token> prompt_tokens;
-    const int n_new_line = params.length_ms / params.step_ms - 1;
-
-    // print some info about the processing
-    {
-        fprintf(stderr, "\n");
-        if (!whisper_is_multilingual(ctx)) {
-            if (params.language != "en" || params.translate) {
-                params.language = "en";
-                params.translate = false;
-                fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-            }
-        }
-        fprintf(stderr, "%s: processing %d samples (step = %.1f sec / len = %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
-                __func__,
-                n_samples,
-                float(n_samples)/WHISPER_SAMPLE_RATE,
-                float(n_samples_len)/WHISPER_SAMPLE_RATE,
-                params.n_threads,
-                params.language.c_str(),
-                params.translate ? "translate" : "transcribe",
-                params.no_timestamps ? 0 : 1);
-
-        fprintf(stderr, "%s: n_new_line = %d\n", __func__, n_new_line);
-        fprintf(stderr, "\n");
-    }
-
-    SDL_PauseAudioDevice(g_dev_id_in, 0);
-
-    int n_iter = 0;
-    bool is_running = true;
-
-    std::ofstream fout;
-    if (params.fname_out.length() > 0) {
-        fout.open(params.fname_out);
-        if (!fout.is_open()) {
-            fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
-            return 1;
-        }
-    }
-
-    printf("[Start speaking]");
-    fflush(stdout);
-
-    // main audio loop
-    while (is_running) {
-        // handle Ctrl + C
-        {
-            SDL_Event event;
-            while (SDL_PollEvent(&event)) {
-                switch (event.type) {
-                    case SDL_QUIT:
-                        {
-                            is_running = false;
-                        } break;
-                    default:
-                        break;
-                }
-            }
-
-            if (!is_running) {
-                break;
-            }
-        }
-
-        if (!is_running) {
-            break;
-        }
-
-        // process new audio
-        if (n_iter > 0 && SDL_GetQueuedAudioSize(g_dev_id_in) > 2*n_samples*sizeof(float)) {
-            fprintf(stderr, "\n\n%s: WARNING: cannot process audio fast enough, dropping audio ...\n\n", __func__);
-            SDL_ClearQueuedAudio(g_dev_id_in);
-        }
-
-        while (SDL_GetQueuedAudioSize(g_dev_id_in) < n_samples*sizeof(float)) {
-            SDL_Delay(1);
-        }
-
-        const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
-
-        // take one second from previous iteration
-        //const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_30s/30 - n_samples_new));
-
-        // take up to params.length_ms audio from previous iteration
-        const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_keep + n_samples_len - n_samples_new));
-
-        //printf("processing: take = %d, new = %d, old = %d\n", n_samples_take, n_samples_new, (int) pcmf32_old.size());
-
-        pcmf32.resize(n_samples_new + n_samples_take);
-
-        for (int i = 0; i < n_samples_take; i++) {
-            pcmf32[i] = pcmf32_old[pcmf32_old.size() - n_samples_take + i];
-        }
-
-        SDL_DequeueAudio(g_dev_id_in, pcmf32.data() + n_samples_take, n_samples_new*sizeof(float));
-
-        pcmf32_old = pcmf32;
-
-        // run the inference
-        {
-            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-            wparams.print_progress   = false;
-            wparams.print_special    = params.print_special;
-            wparams.print_realtime   = false;
-            wparams.print_timestamps = !params.no_timestamps;
-            wparams.translate        = params.translate;
-            wparams.no_context       = true;
-            wparams.single_segment   = true;
-            wparams.max_tokens       = params.max_tokens;
-            wparams.language         = params.language.c_str();
-            wparams.n_threads        = params.n_threads;
-
-            wparams.audio_ctx        = params.audio_ctx;
-            wparams.speed_up         = params.speed_up;
-
-            wparams.prompt_tokens    = params.no_context ? nullptr : prompt_tokens.data();
-            wparams.prompt_n_tokens  = params.no_context ? 0       : prompt_tokens.size();
-
-            if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
-                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
-                return 6;
-            }
-
-            // print result;
-            {
-                printf("\33[2K\r");
-
-                // print long empty line to clear the previous line
-                printf("%s", std::string(100, ' ').c_str());
-
-                printf("\33[2K\r");
-
-                const int n_segments = whisper_full_n_segments(ctx);
-                for (int i = 0; i < n_segments; ++i) {
-                    const char * text = whisper_full_get_segment_text(ctx, i);
-
-                    if (params.no_timestamps) {
-                        printf("%s", text);
-                        fflush(stdout);
-
-                        if (params.fname_out.length() > 0) {
-                            fout << text;
-                        }
-                    } else {
-                        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
-
-                        if (params.fname_out.length() > 0) {
-                            fout << "[" << to_timestamp(t0) << " --> " << to_timestamp(t1) << "]  " << text << std::endl;
-                        }
-                    }
-                }
-
-                if (params.fname_out.length() > 0) {
-                    fout << std::endl;
-                }
-            }
-
-            ++n_iter;
-
-            if ((n_iter % n_new_line) == 0) {
-                printf("\n");
-
-                // keep part of the audio for next iteration to try to mitigate word boundary issues
-                pcmf32_old = std::vector<float>(pcmf32.end() - n_samples_keep, pcmf32.end());
-
-                // Add tokens of the last full length segment as the prompt
-                if (!params.no_context) {
-                    prompt_tokens.clear();
-
-                    const int n_segments = whisper_full_n_segments(ctx);
-                    for (int i = 0; i < n_segments; ++i) {
-                        const int token_count = whisper_full_n_tokens(ctx, i);
-                        for (int j = 0; j < token_count; ++j) {
-                            prompt_tokens.push_back(whisper_full_get_token_id(ctx, i, j));
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    if (g_dev_id_in >= 0) {
-        SDL_CloseAudioDevice(g_dev_id_in);
-    }
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/talk.wasm/CMakeLists.txt
+++ b/examples/talk.wasm/CMakeLists.txt
@ -1,48 +0,0 @@
-#
-# libtalk
-#
-
-set(TARGET libtalk)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    gpt-2.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside talk.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libtalk.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/talk.wasm/talk.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1600MB \
-    -s TOTAL_MEMORY=1600MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-#
-# talk.wasm
-#
-
-set(TARGET talk.wasm)
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/talk.wasm/README.md
+++ b/examples/talk.wasm/README.md
@ -1,74 +0,0 @@
-# talk.wasm
-
-Talk with an Artificial Intelligence in your browser:
-
-[https://user-images.githubusercontent.com/1991296/203411580-fedb4839-05e4-4474-8364-aaf1e9a9b615.mp4](https://user-images.githubusercontent.com/1991296/203845553-f7b44e13-9a15-4fc8-b518-ae8f4c6770fe.mp4)
-
-Online demo: https://whisper.ggerganov.com/talk/
-
-Terminal version: [examples/talk](/examples/talk)
-
-## How it works?
-
-This demo leverages 2 modern neural network models to create a high-quality voice chat directly in your browser:
-
- [OpenAI's Whisper](https://github.com/openai/whisper) speech recognition model is used to process your voice and understand what you are saying
- Upon receiving some voice input, the AI generates a text response using [OpenAI's GPT-2](https://github.com/openai/gpt-2) language model
- The AI then vocalizes the response using the browser's [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API)
-
-The web page does the processing locally on your machine. The processing of these heavy neural network models in the
-browser is possible by implementing them efficiently in C/C++ and using the browser's WebAssembly SIMD capabilities for
-extra performance:
-
- The Whisper C++ implementation is here: [whisper.h](/whisper.h) / [whisper.cpp](/whisper.cpp)
- The GPT-2 C++ implementation is here: [gpt-2.h](gpt-2.h) / [gpt-2.cpp](gpt-2.cpp)
- Both models use a custom tensor library implemented in C: [ggml.h](/ggml.h) / [ggml.c](/ggml.c)
- The HTML/JS layer is here: [index-tmpl.html](index-tmpl.html)
- The Emscripten bridge between C/C++ and JS is here: [emscripten.cpp](emscripten.cpp)
-
-In order to run the models, the web page first needs to download the model data which is about ~350 MB. The model data
-is then cached in your browser's cache and can be reused in future visits without downloading it again.
-
-## Requirements
-
-In order to run this demo efficiently, you need to have the following:
-
- Latest Chrome or Firefox browser (Safari is not supported)
- Run this on a desktop or laptop with modern CPU (a mobile phone will likely not be good enough)
- Speak phrases that are no longer than 10 seconds - this is the audio context of the AI
- The web-page uses about 1.6GB of RAM
-
-Notice that this demo is using the smallest GPT-2 model, so the generated text responses are not always very good.
-Also, the prompting strategy can likely be improved to achieve better results.
-
-The demo is quite computationally heavy, so you need a fast CPU. It's not usual to run these transformer models in a
-browser. Typically, they run on powerful GPUs.
-
-Currently, mobile browsers do not support the Fixed-width SIMD WebAssembly capability, so you cannot run this demo
-on a phone or a tablet. Hopefully, in the near future this will become supported.
-
-## Todo
-
- Better UI (contributions are welcome)
- Better GPT-2 prompting
-
-## Build instructions
-
-```bash
-# build using Emscripten (v3.1.2)
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-mkdir build-em && cd build-em
-emcmake cmake ..
-make -j
-
-# copy the produced page to your HTTP path
-cp bin/talk.wasm/*       /path/to/html/
-cp bin/libtalk.worker.js /path/to/html/
-```
-
-## Feedback
-
-If you have any comments or ideas for improvement, please drop a comment in the following discussion:
-
-https://github.com/ggerganov/whisper.cpp/discussions/167
--- a/examples/talk.wasm/emscripten.cpp
+++ b/examples/talk.wasm/emscripten.cpp
@ -1,380 +0,0 @@
-#include "ggml.h"
-#include "gpt-2.h"
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <atomic>
-#include <cmath>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-#include <regex>
-
-constexpr int N_THREAD = 8;
-
-struct gpt2_context * g_gpt2;
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::mutex g_mutex;
-std::thread g_worker;
-std::atomic<bool> g_running(false);
-
-bool g_force_speak = false;
-std::string g_text_to_speak = "";
-std::string g_status = "";
-std::string g_status_forced = "";
-
-std::vector<float> g_pcmf32;
-
-std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
-
-    return std::string(buf);
-}
-
-void talk_set_status(const std::string & status) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_status = status;
-}
-
-void talk_main(size_t index) {
-    talk_set_status("loading data ...");
-
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 768; // partial encoder context for better performance
-
-    wparams.language         = "en";
-
-    g_gpt2 = gpt2_init("gpt-2.bin");
-
-    printf("talk: using %d threads\n", wparams.n_threads);
-
-    std::vector<float> pcmf32;
-
-    // whisper context
-    auto & ctx = g_contexts[index];
-
-    const int64_t step_samples   = 2*WHISPER_SAMPLE_RATE;
-    const int64_t window_samples = 9*WHISPER_SAMPLE_RATE;
-    const int64_t step_ms        = (step_samples*1000)/WHISPER_SAMPLE_RATE;
-
-    auto t_last = std::chrono::high_resolution_clock::now();
-
-    talk_set_status("listening ...");
-
-    while (g_running) {
-
-        const auto t_now = std::chrono::high_resolution_clock::now();
-        if (std::chrono::duration_cast<std::chrono::milliseconds>(t_now - t_last).count() < step_ms) {
-            {
-                std::lock_guard<std::mutex> lock(g_mutex);
-                g_pcmf32.clear();
-            }
-            std::this_thread::sleep_for(std::chrono::milliseconds(10));
-            continue;
-        }
-
-        talk_set_status("listening ...");
-
-        {
-            std::unique_lock<std::mutex> lock(g_mutex);
-
-            if (g_pcmf32.size() < step_samples) {
-                lock.unlock();
-
-                std::this_thread::sleep_for(std::chrono::milliseconds(10));
-
-                continue;
-            }
-
-            pcmf32 = std::vector<float>(g_pcmf32.end() - std::min((int64_t) g_pcmf32.size(), window_samples), g_pcmf32.end());
-        }
-
-        // VAD: if energy in during last second is above threshold, then skip
-        {
-            float energy_all = 0.0f;
-            float energy_1s  = 0.0f;
-
-            for (size_t i = 0; i < pcmf32.size(); i++) {
-                energy_all += fabsf(pcmf32[i]);
-
-                if (i >= pcmf32.size() - WHISPER_SAMPLE_RATE) {
-                    energy_1s += fabsf(pcmf32[i]);
-                }
-            }
-
-            energy_all /= pcmf32.size();
-            energy_1s  /= WHISPER_SAMPLE_RATE;
-
-            if (energy_1s > 0.1f*energy_all && !g_force_speak) {
-                std::this_thread::sleep_for(std::chrono::milliseconds(10));
-                continue;
-            }
-        }
-
-        talk_set_status("processing audio (whisper)...");
-
-        t_last = t_now;
-
-        if (!g_force_speak) {
-            const auto t_start = std::chrono::high_resolution_clock::now();
-
-            int ret = whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size());
-            if (ret != 0) {
-                printf("whisper_full() failed: %d\n", ret);
-                break;
-            }
-
-            const auto t_end = std::chrono::high_resolution_clock::now();
-
-            printf("whisper_full() returned %d in %f seconds\n", ret, std::chrono::duration<double>(t_end - t_start).count());
-        }
-
-        {
-            std::string text_heard;
-
-            if (!g_force_speak) {
-                const int n_segments = whisper_full_n_segments(ctx);
-                for (int i = n_segments - 1; i < n_segments; ++i) {
-                    const char * text = whisper_full_get_segment_text(ctx, i);
-
-                    const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                    const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-                    printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
-
-                    text_heard += text;
-                }
-            }
-
-            g_force_speak = false;
-
-            // remove text between brackets using regex
-            {
-                std::regex re("\\[.*?\\]");
-                text_heard = std::regex_replace(text_heard, re, "");
-            }
-
-            // remove text between brackets using regex
-            {
-                std::regex re("\\(.*?\\)");
-                text_heard = std::regex_replace(text_heard, re, "");
-            }
-
-            // remove all characters, except for letters, numbers, punctuation and ':', '\'', '-', ' '
-            text_heard = std::regex_replace(text_heard, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-
-            // take first line
-            text_heard = text_heard.substr(0, text_heard.find_first_of("\n"));
-
-            // remove leading and trailing whitespace
-            text_heard = std::regex_replace(text_heard, std::regex("^\\s+"), "");
-            text_heard = std::regex_replace(text_heard, std::regex("\\s+$"), "");
-
-            talk_set_status("'" + text_heard + "' - thinking how to respond (gpt-2) ...");
-
-            const std::vector<gpt_vocab::id> tokens = gpt2_tokenize(g_gpt2, text_heard.c_str());
-
-            printf("whisper: number of tokens: %d, '%s'\n", (int) tokens.size(), text_heard.c_str());
-
-            std::string text_to_speak;
-            std::string prompt_base;
-
-            {
-                std::lock_guard<std::mutex> lock(g_mutex);
-                prompt_base = gpt2_get_prompt(g_gpt2);
-            }
-
-            if (tokens.size() > 0) {
-                text_to_speak = gpt2_gen_text(g_gpt2, (prompt_base + text_heard + "\n").c_str(), 32);
-                text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-                text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of("\n"));
-
-                std::lock_guard<std::mutex> lock(g_mutex);
-
-                // remove first 2 lines of base prompt
-                {
-                    const size_t pos = prompt_base.find_first_of("\n");
-                    if (pos != std::string::npos) {
-                        prompt_base = prompt_base.substr(pos + 1);
-                    }
-                }
-                {
-                    const size_t pos = prompt_base.find_first_of("\n");
-                    if (pos != std::string::npos) {
-                        prompt_base = prompt_base.substr(pos + 1);
-                    }
-                }
-                prompt_base += text_heard + "\n" + text_to_speak + "\n";
-            } else {
-                text_to_speak = gpt2_gen_text(g_gpt2, prompt_base.c_str(), 32);
-                text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-                text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of("\n"));
-
-                std::lock_guard<std::mutex> lock(g_mutex);
-
-                const size_t pos = prompt_base.find_first_of("\n");
-                if (pos != std::string::npos) {
-                    prompt_base = prompt_base.substr(pos + 1);
-                }
-                prompt_base += text_to_speak + "\n";
-            }
-
-            printf("gpt-2: %s\n", text_to_speak.c_str());
-
-            //printf("========================\n");
-            //printf("gpt-2: prompt_base:\n'%s'\n", prompt_base.c_str());
-            //printf("========================\n");
-
-            {
-                std::lock_guard<std::mutex> lock(g_mutex);
-                t_last = std::chrono::high_resolution_clock::now();
-                g_text_to_speak = text_to_speak;
-                g_pcmf32.clear();
-                gpt2_set_prompt(g_gpt2, prompt_base.c_str());
-            }
-
-            talk_set_status("speaking ...");
-        }
-    }
-
-    gpt2_free(g_gpt2);
-
-    if (index < g_contexts.size()) {
-        whisper_free(g_contexts[index]);
-        g_contexts[index] = nullptr;
-    }
-}
-
-EMSCRIPTEN_BINDINGS(talk) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    g_running = true;
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        talk_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (g_running) {
-            g_running = false;
-        }
-    }));
-
-    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            const int n = audio["length"].as<int>();
-
-            emscripten::val heap = emscripten::val::module_property("HEAPU8");
-            emscripten::val memory = heap["buffer"];
-
-            g_pcmf32.resize(n);
-
-            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
-            memoryView.call<void>("set", audio);
-        }
-
-        return 0;
-    }));
-
-    emscripten::function("force_speak", emscripten::optional_override([](size_t index) {
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            g_force_speak = true;
-        }
-    }));
-
-    emscripten::function("get_text_context", emscripten::optional_override([]() {
-        std::string text_context;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            text_context = gpt2_get_prompt(g_gpt2);
-        }
-
-        return text_context;
-    }));
-
-    emscripten::function("get_text_to_speak", emscripten::optional_override([]() {
-        std::string text_to_speak;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            text_to_speak = std::move(g_text_to_speak);
-        }
-
-        return text_to_speak;
-    }));
-
-    emscripten::function("get_status", emscripten::optional_override([]() {
-        std::string status;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            status = g_status_forced.empty() ? g_status : g_status_forced;
-        }
-
-        return status;
-    }));
-
-    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            g_status_forced = status;
-        }
-    }));
-
-    emscripten::function("set_prompt", emscripten::optional_override([](const std::string & prompt) {
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            gpt2_set_prompt(g_gpt2, prompt.c_str());
-        }
-    }));
-}
--- a/examples/talk.wasm/gpt-2.cpp
+++ b/examples/talk.wasm/gpt-2.cpp
@ -1,925 +0,0 @@
-#include "ggml.h"
-#include "gpt-2.h"
-
-#include <cmath>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <map>
-#include <string>
-#include <thread>
-#include <vector>
-#include <regex>
-#include <random>
-
-/////////////////////// GPT-2 BEGIN /////////////////////////
-
-//
-// Vocab utils
-//
-
-std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text) {
-    std::vector<std::string> words;
-
-    // first split the text into words
-    {
-        std::string str = text;
-        std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
-
-        std::regex re(pat);
-        std::smatch m;
-
-        while (std::regex_search(str, m, re)) {
-            for (auto x : m) {
-                words.push_back(x);
-            }
-            str = m.suffix();
-        }
-    }
-
-    // find the longest tokens that form the words:
-    std::vector<gpt_vocab::id> tokens;
-    for (const auto & word : words) {
-        if (word.size() == 0) continue;
-
-        int i = 0;
-        int n = word.size();
-        while (i < n) {
-            int j = n;
-            while (j > i) {
-                auto it = vocab.token_to_id.find(word.substr(i, j-i));
-                if (it != vocab.token_to_id.end()) {
-                    tokens.push_back(it->second);
-                    i = j;
-                    break;
-                }
-                --j;
-            }
-            if (i == n) {
-                break;
-            }
-            if (j == i) {
-                auto sub = word.substr(i, 1);
-                if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) {
-                    tokens.push_back(vocab.token_to_id.at(sub));
-                } else {
-                    fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data());
-                }
-                ++i;
-            }
-        }
-    }
-
-    return tokens;
-}
-
-gpt_vocab::id gpt_sample_top_k_top_p(
-        const gpt_vocab & vocab,
-        const float * logits,
-        int    top_k,
-        double top_p,
-        double temp,
-        std::mt19937 & rng) {
-    int n_logits = vocab.id_to_token.size();
-
-    std::vector<std::pair<double, gpt_vocab::id>> logits_id;
-    logits_id.reserve(n_logits);
-
-    for (int i = 0; i < n_logits; i++) {
-        logits_id.push_back(std::make_pair(logits[i], i));
-    }
-
-    // find the top K tokens
-    std::partial_sort(
-            logits_id.begin(),
-            logits_id.begin() + top_k, logits_id.end(),
-            [](const std::pair<double, gpt_vocab::id> & a, const std::pair<double, gpt_vocab::id> & b) {
-        return a.first > b.first;
-    });
-
-    logits_id.resize(top_k);
-
-    // normalize
-    {
-        double sum = 0.0f;
-        for (int i = 0; i < (int)logits_id.size(); i++) {
-            sum += logits_id[i].first;
-        }
-
-        sum = 1.0/sum;
-        for (int i = 0; i < (int)logits_id.size(); i++) {
-            logits_id[i].first *= sum;
-        }
-    }
-
-    if (top_p < 1.0f) {
-        {
-            double cumsum = 0.0f;
-            for (int i = 0; i < top_k; i++) {
-                cumsum += logits_id[i].first;
-                if (cumsum >= top_p) {
-                    logits_id.resize(i+1);
-                    break;
-                }
-            }
-        }
-
-        // normalize again
-        {
-            double sum = 0.0f;
-            for (int i = 0; i < (int)logits_id.size(); i++) {
-                sum += logits_id[i].first;
-            }
-
-            sum = 1.0/sum;
-            for (int i = 0; i < (int)logits_id.size(); i++) {
-                logits_id[i].first *= sum;
-            }
-        }
-    }
-
-    //printf("\n");
-    //for (int i = 0; i < (int)logits_id.size(); i++) {
-    //    printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), logits_id[i].first);
-    //}
-    //exit(0);
-
-    // sample from the obtained distribution
-    std::vector<double> probs;
-    probs.reserve(logits_id.size());
-
-    for (int i = 0; i < (int) logits_id.size(); i++) {
-        probs.push_back(logits_id[i].first);
-    }
-
-    std::discrete_distribution<> dist(probs.begin(), probs.end());
-    int idx = dist(rng);
-
-    return logits_id[idx].second;
-}
-
-// default hparams (GPT-2 117M)
-struct gpt2_hparams {
-    int32_t n_vocab = 50257;
-    int32_t n_ctx   = 1024;
-    int32_t n_embd  = 768;
-    int32_t n_head  = 12;
-    int32_t n_layer = 12;
-    int32_t f16     = 1;
-};
-
-struct gpt2_layer {
-    // normalization
-    struct ggml_tensor * ln_1_g;
-    struct ggml_tensor * ln_1_b;
-
-    struct ggml_tensor * ln_2_g;
-    struct ggml_tensor * ln_2_b;
-
-    // attention
-    struct ggml_tensor * c_attn_attn_w;
-    struct ggml_tensor * c_attn_attn_b;
-
-    struct ggml_tensor * c_attn_proj_w;
-    struct ggml_tensor * c_attn_proj_b;
-
-    // mlp
-    struct ggml_tensor * c_mlp_fc_w;
-    struct ggml_tensor * c_mlp_fc_b;
-
-    struct ggml_tensor * c_mlp_proj_w_trans; // transposed for efficiency
-    struct ggml_tensor * c_mlp_proj_b;
-};
-
-struct gpt2_model {
-    gpt2_hparams hparams;
-
-    // normalization
-    struct ggml_tensor * ln_f_g;
-    struct ggml_tensor * ln_f_b;
-
-    struct ggml_tensor * wte; // position embedding
-    struct ggml_tensor * wpe; //    token embedding
-
-    std::vector<gpt2_layer> layers;
-
-    // key + value memory
-    struct ggml_tensor * memory_k;
-    struct ggml_tensor * memory_v;
-
-    //
-    struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
-};
-
-// load the model's weights from a file
-bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
-    printf("%s: loading model from '%s'\n", __func__, fname.c_str());
-
-    auto fin = std::ifstream(fname, std::ios::binary);
-    if (!fin) {
-        fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
-        return false;
-    }
-
-    // verify magic
-    {
-        uint32_t magic;
-        fin.read((char *) &magic, sizeof(magic));
-        if (magic != 0x67676d6c) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
-            return false;
-        }
-    }
-
-    // load hparams
-    {
-        auto & hparams = model.hparams;
-
-        fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab));
-        fin.read((char *) &hparams.n_ctx,   sizeof(hparams.n_ctx));
-        fin.read((char *) &hparams.n_embd,  sizeof(hparams.n_embd));
-        fin.read((char *) &hparams.n_head,  sizeof(hparams.n_head));
-        fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
-        fin.read((char *) &hparams.f16,     sizeof(hparams.f16));
-
-        printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
-        printf("%s: n_ctx   = %d\n", __func__, hparams.n_ctx);
-        printf("%s: n_embd  = %d\n", __func__, hparams.n_embd);
-        printf("%s: n_head  = %d\n", __func__, hparams.n_head);
-        printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
-        printf("%s: f16     = %d\n", __func__, hparams.f16);
-    }
-
-    // load vocab
-    {
-        int32_t n_vocab = 0;
-        fin.read((char *) &n_vocab, sizeof(n_vocab));
-
-        if (n_vocab != model.hparams.n_vocab) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
-                    __func__, fname.c_str(), n_vocab, model.hparams.n_vocab);
-            return false;
-        }
-
-        std::string word;
-        for (int i = 0; i < n_vocab; i++) {
-            uint32_t len;
-            fin.read((char *) &len, sizeof(len));
-
-            word.resize(len);
-            fin.read((char *) word.data(), len);
-
-            vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-        }
-    }
-
-    // for the big tensors, we have the option to store the data in 16-bit floats
-    // in order to save memory and also to speed up the computation
-    const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
-
-    auto & ctx = model.ctx;
-
-    size_t ctx_size = 0;
-
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-        const int n_vocab = hparams.n_vocab;
-
-        ctx_size += n_embd*ggml_type_size(GGML_TYPE_F32); // ln_f_g
-        ctx_size += n_embd*ggml_type_size(GGML_TYPE_F32); // ln_f_b
-
-        ctx_size += n_vocab*n_embd*ggml_type_size(wtype);         // wte
-        ctx_size +=   n_ctx*n_embd*ggml_type_size(GGML_TYPE_F32); // wpe
-
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_1_g
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_1_b
-
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_2_g
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_2_b
-
-        ctx_size += n_layer*(3*n_embd*n_embd*ggml_type_size(wtype));         // c_attn_attn_w
-        ctx_size += n_layer*(       3*n_embd*ggml_type_size(GGML_TYPE_F32)); // c_attn_attn_b
-
-        ctx_size += n_layer*(n_embd*n_embd*ggml_type_size(wtype));           // c_attn_proj_w
-        ctx_size += n_layer*(       n_embd*ggml_type_size(GGML_TYPE_F32));   // c_attn_proj_b
-
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_size(wtype));         // c_mlp_fc_w
-        ctx_size += n_layer*(       4*n_embd*ggml_type_size(GGML_TYPE_F32)); // c_mlp_fc_b
-
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_size(wtype));         // c_mlp_proj_w
-        ctx_size += n_layer*(         n_embd*ggml_type_size(GGML_TYPE_F32)); // c_mlp_proj_b
-
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_size(GGML_TYPE_F32); // memory_k
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_size(GGML_TYPE_F32); // memory_v
-
-        ctx_size += (6 + 12*n_layer)*256; // object overhead
-
-        printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
-    }
-
-    // create the ggml context
-    {
-        struct ggml_init_params params;
-        params.mem_size   = ctx_size;
-        params.mem_buffer = NULL;
-
-        model.ctx = ggml_init(params);
-        if (!model.ctx) {
-            fprintf(stderr, "%s: ggml_init() failed\n", __func__);
-            return false;
-        }
-    }
-
-    // prepare memory for the weights
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-        const int n_vocab = hparams.n_vocab;
-
-        model.layers.resize(n_layer);
-
-        model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
-        model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
-
-        model.wte = ggml_new_tensor_2d(ctx, wtype,         n_embd, n_vocab);
-        model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx);
-
-        // map by name
-        model.tensors["model/ln_f/g"] = model.ln_f_g;
-        model.tensors["model/ln_f/b"] = model.ln_f_b;
-
-        model.tensors["model/wte"] = model.wte;
-        model.tensors["model/wpe"] = model.wpe;
-
-        for (int i = 0; i < n_layer; ++i) {
-            auto & layer = model.layers[i];
-
-            layer.ln_1_g             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-            layer.ln_1_b             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.ln_2_g             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-            layer.ln_2_b             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.c_attn_attn_w      = ggml_new_tensor_2d(ctx, wtype,         3*n_embd, n_embd);
-            layer.c_attn_attn_b      = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd);
-
-            layer.c_attn_proj_w      = ggml_new_tensor_2d(ctx, wtype,           n_embd, n_embd);
-            layer.c_attn_proj_b      = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.c_mlp_fc_w         = ggml_new_tensor_2d(ctx, wtype,         4*n_embd, n_embd);
-            layer.c_mlp_fc_b         = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd);
-
-            layer.c_mlp_proj_w_trans = ggml_new_tensor_2d(ctx, wtype,         4*n_embd, n_embd);
-            layer.c_mlp_proj_b       = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            // map by name
-            model.tensors["model/h" + std::to_string(i) + "/ln_1/g"]        = layer.ln_1_g;
-            model.tensors["model/h" + std::to_string(i) + "/ln_1/b"]        = layer.ln_1_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/ln_2/g"]        = layer.ln_2_g;
-            model.tensors["model/h" + std::to_string(i) + "/ln_2/b"]        = layer.ln_2_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w;
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w;
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"]    = layer.c_mlp_fc_w;
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"]    = layer.c_mlp_fc_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"]  = layer.c_mlp_proj_w_trans;
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"]  = layer.c_mlp_proj_b;
-        }
-    }
-
-    // key + value memory
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-
-        const int n_mem      = n_layer*n_ctx;
-        const int n_elements = n_embd*n_mem;
-
-        model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);
-        model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);
-
-        const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);
-
-        printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
-    }
-
-    // load weights
-    {
-        size_t total_size = 0;
-
-        while (true) {
-            int32_t n_dims;
-            int32_t length;
-            int32_t ftype;
-
-            fin.read(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
-            fin.read(reinterpret_cast<char *>(&length), sizeof(length));
-            fin.read(reinterpret_cast<char *>(&ftype),  sizeof(ftype));
-
-            if (fin.eof()) {
-                break;
-            }
-
-            int32_t nelements = 1;
-            int32_t ne[2] = { 1, 1 };
-            for (int i = 0; i < n_dims; ++i) {
-                fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
-                nelements *= ne[i];
-            }
-
-            std::string name(length, 0);
-            fin.read(&name[0], length);
-
-            if (model.tensors.find(name.data()) == model.tensors.end()) {
-                fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
-                return false;
-            }
-
-            auto tensor = model.tensors[name.data()];
-            if (ggml_nelements(tensor) != nelements) {
-                fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
-                return false;
-            }
-
-            if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
-                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
-                        __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
-                return false;
-            }
-
-            const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
-
-            if (nelements*bpe != ggml_nbytes(tensor)) {
-                fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
-                        __func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
-                return false;
-            }
-
-            fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
-
-            //printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
-            total_size += ggml_nbytes(tensor);
-        }
-
-        printf("%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
-    }
-
-    fin.close();
-
-    return true;
-}
-
-// evaluate the transformer
-//
-//   - model:     the model
-//   - n_threads: number of threads to use
-//   - n_past:    the context size so far
-//   - embd_inp:  the embeddings of the tokens in the context
-//   - embd_w:    the predicted probabilities of the next token
-//
-bool gpt2_eval(
-        const gpt2_model & model,
-        const int n_threads,
-        const int n_past,
-        const std::vector<gpt_vocab::id> & embd_inp,
-              std::vector<float>         & embd_w,
-              size_t                     & mem_per_token) {
-    const int N = embd_inp.size();
-
-    const auto & hparams = model.hparams;
-
-    const int n_embd  = hparams.n_embd;
-    const int n_layer = hparams.n_layer;
-    const int n_ctx   = hparams.n_ctx;
-    const int n_head  = hparams.n_head;
-    const int n_vocab = hparams.n_vocab;
-
-    static size_t buf_size = 640u*1024*1024;
-    static void * buf = malloc(buf_size);
-
-    if (mem_per_token > 0 && mem_per_token*N > buf_size) {
-        const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
-        printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
-
-        // reallocate
-        buf_size = buf_size_new;
-        buf = realloc(buf, buf_size);
-        if (buf == nullptr) {
-            fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
-            return false;
-        }
-    }
-
-    struct ggml_init_params params;
-    params.mem_size   = buf_size;
-    params.mem_buffer = buf;
-
-    struct ggml_context * ctx0 = ggml_init(params);
-
-    struct ggml_cgraph gf = { };
-    gf.n_threads = n_threads;
-
-    struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
-    memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
-
-    struct ggml_tensor * position = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
-    for (int i = 0; i < N; ++i) {
-        ((int32_t *) position->data)[i] = n_past + i;
-    }
-
-    // wte + wpe
-    struct ggml_tensor * inpL =
-        ggml_add(ctx0,
-                ggml_get_rows(ctx0, model.wte, embd),
-                ggml_get_rows(ctx0, model.wpe, position));
-
-    for (int il = 0; il < n_layer; ++il) {
-        struct ggml_tensor * cur;
-
-        // norm
-        {
-            // [ 768, N]
-            cur = ggml_norm(ctx0, inpL);
-
-            // cur = ln_1_g*cur + ln_1_b
-            // [ 768, N]
-            cur = ggml_add(ctx0,
-                    ggml_mul(ctx0,
-                        ggml_repeat(ctx0, model.layers[il].ln_1_g, cur),
-                        cur),
-                    ggml_repeat(ctx0, model.layers[il].ln_1_b, cur));
-        }
-
-        // attn
-        // [2304, 768] - model.layers[il].c_attn_attn_w
-        // [2304,   1] - model.layers[il].c_attn_attn_b
-        // [ 768,   N] - cur (in)
-        // [2304,   N] - cur (out)
-        //
-        // cur = attn_w*cur + attn_b
-        // [2304, N]
-        {
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_attn_attn_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur),
-                    cur);
-        }
-
-        // self-attention
-        {
-            struct ggml_tensor * Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd);
-            struct ggml_tensor * Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd);
-            struct ggml_tensor * Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd);
-
-            // store key and value to memory
-            if (N >= 1) {
-                struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
-                struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));
-
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
-            }
-
-            // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
-            // [64, N, 12]
-            struct ggml_tensor * Q =
-                ggml_permute(ctx0,
-                        ggml_cpy(ctx0,
-                            Qcur,
-                            ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd/n_head, n_head, N)),
-                        0, 2, 1, 3);
-
-            // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3)
-            // [64, n_past + N, 12]
-            struct ggml_tensor * K =
-                ggml_permute(ctx0,
-                        ggml_reshape_3d(ctx0,
-                            ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd),
-                            n_embd/n_head, n_head, n_past + N),
-                        0, 2, 1, 3);
-
-            // GG: flash attention
-            //struct ggml_tensor * V =
-            //    ggml_cpy(ctx0,
-            //            ggml_permute(ctx0,
-            //                ggml_reshape_3d(ctx0,
-            //                    ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd),
-            //                    n_embd/n_head, n_head, n_past + N),
-            //                1, 2, 0, 3),
-            //            ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head));
-
-            //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true);
-
-            // K * Q
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
-
-            // KQ_scaled = KQ / sqrt(n_embd/n_head)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_scaled =
-                ggml_scale(ctx0,
-                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head))
-                        );
-
-            // KQ_masked = mask_past(KQ_scaled)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past);
-
-            // KQ = soft_max(KQ_masked)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked);
-
-            // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous()
-            // [n_past + N, 64, 12]
-            struct ggml_tensor * V_trans =
-                ggml_permute(ctx0,
-                        ggml_reshape_3d(ctx0,
-                            ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd),
-                            n_embd/n_head, n_head, n_past + N),
-                        1, 2, 0, 3);
-
-            // KQV = transpose(V) * KQ_soft_max
-            // [64, N, 12]
-            struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);
-
-            // KQV_merged = KQV.permute(0, 2, 1, 3)
-            // [64, 12, N]
-            struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
-
-            // cur = KQV_merged.contiguous().view(n_embd, N)
-            // [768, N]
-            cur = ggml_cpy(ctx0,
-                    KQV_merged,
-                    ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
-        }
-
-        // projection
-        // [ 768, 768] - model.layers[il].c_attn_proj_w
-        // [ 768,   1] - model.layers[il].c_attn_proj_b
-        // [ 768,   N] - cur (in)
-        // [ 768,   N] - cur (out)
-        //
-        // cur = proj_w*cur + proj_b
-        // [768, N]
-        {
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_attn_proj_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur),
-                    cur);
-        }
-
-        // add the input
-        cur = ggml_add(ctx0, cur, inpL);
-
-        struct ggml_tensor * inpFF = cur;
-
-        // feed-forward network
-        {
-            // norm
-            {
-                cur = ggml_norm(ctx0, inpFF);
-
-                // cur = ln_2_g*cur + ln_2_b
-                // [ 768, N]
-                cur = ggml_add(ctx0,
-                        ggml_mul(ctx0,
-                            ggml_repeat(ctx0, model.layers[il].ln_2_g, cur),
-                            cur),
-                        ggml_repeat(ctx0, model.layers[il].ln_2_b, cur));
-            }
-
-            // fully connected
-            // [3072, 768] - model.layers[il].c_mlp_fc_w
-            // [3072,   1] - model.layers[il].c_mlp_fc_b
-            // [ 768,   N] - cur (in)
-            // [3072,   N] - cur (out)
-            //
-            // cur = fc_w*cur + fc_b
-            // [3072, N]
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_mlp_fc_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur),
-                    cur);
-
-            // GELU activation
-            // [3072, N]
-            cur = ggml_gelu(ctx0, cur);
-
-            // projection
-            // [ 768, 3072] - model.layers[il].c_mlp_proj_w
-            // [ 768,    1] - model.layers[il].c_mlp_proj_b
-            // [3072,    N] - cur (in)
-            // [ 768,    N] - cur (out)
-            //
-            // cur = proj_w*cur + proj_b
-            // [768, N]
-            cur = ggml_mul_mat(ctx0,
-                    model.layers[il].c_mlp_proj_w_trans,
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur),
-                    cur);
-        }
-
-        // input for next layer
-        inpL = ggml_add(ctx0, cur, inpFF);
-    }
-
-    // norm
-    {
-        // [ 768, N]
-        inpL = ggml_norm(ctx0, inpL);
-
-        // inpL = ln_f_g*inpL + ln_f_b
-        // [ 768, N]
-        inpL = ggml_add(ctx0,
-                ggml_mul(ctx0,
-                    ggml_repeat(ctx0, model.ln_f_g, inpL),
-                    inpL),
-                ggml_repeat(ctx0, model.ln_f_b, inpL));
-    }
-
-    // inpL = WTE * inpL
-    // [ 768, 50257] - model.wte
-    // [ 768, N]     - inpL
-    inpL = ggml_mul_mat(ctx0, model.wte, inpL);
-
-    // logits -> probs
-    inpL = ggml_soft_max(ctx0, inpL);
-
-    // run the computation
-    ggml_build_forward_expand(&gf, inpL);
-    ggml_graph_compute       (ctx0, &gf);
-
-    //if (n_past%100 == 0) {
-    //    ggml_graph_print   (&gf);
-    //    ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot");
-    //}
-
-    //embd_w.resize(n_vocab*N);
-    //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
-
-    // return result for just the last token
-    embd_w.resize(n_vocab);
-    memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
-
-    if (mem_per_token == 0) {
-        mem_per_token = ggml_used_mem(ctx0)/N;
-    }
-    //printf("used_mem = %zu\n", ggml_used_mem(ctx0));
-
-    ggml_free(ctx0);
-
-    return true;
-}
-
-/////////////////////////////// GPT-2 END ////////////////////////////////
-
-constexpr int N_THREAD = 8;
-
-struct gpt2_context {
-    std::string prompt_base = R"(Hello, how are you?
-I'm fine, thanks. How are you?
-Thanks, I'm fine too. What are you doing?
-I'm just sitting here.
-It's a lovely day, isn't it?
-Yes, it is. I love the weather this time of year.
-I wish it would rain a little bit.
-Me too.
-)";
-
-    std::mt19937 rng;
-
-    gpt_vocab vocab;
-    gpt2_model model;
-
-    int32_t n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-
-    // sampling parameters
-    int32_t top_k = 40;
-    float   top_p = 0.9f;
-    float   temp  = 1.0f;
-};
-
-struct gpt2_context * gpt2_init(const char * path_model) {
-    gpt2_context * ctx = new gpt2_context;
-
-    ctx->rng = std::mt19937(time(NULL));
-
-    // load the model
-    {
-        const int64_t t_start_us = ggml_time_us();
-
-        if (!gpt2_model_load(path_model, ctx->model, ctx->vocab)) {
-            fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, "gpt-2.bin");
-            return nullptr;
-        }
-
-        const int64_t t_load_us = ggml_time_us() - t_start_us;
-
-        printf("gpt-2: model loaded in %d ms\n", (int) (t_load_us/1000));
-    }
-
-    return ctx;
-}
-
-void gpt2_free(struct gpt2_context * ctx) {
-    delete ctx;
-}
-
-const char * gpt2_get_prompt(struct gpt2_context * ctx) {
-    return ctx->prompt_base.c_str();
-}
-
-void gpt2_set_prompt(struct gpt2_context * ctx, const char * prompt) {
-    ctx->prompt_base = prompt;
-}
-
-std::vector<gpt_vocab::id> gpt2_tokenize(const gpt2_context * ctx, const char * text) {
-    return ::gpt_tokenize(ctx->vocab, text);
-}
-
-std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens) {
-    int n_past = 0;
-
-    std::vector<float> embd_w;
-
-    // tokenize the prompt
-    std::vector<gpt_vocab::id> embd_inp = ::gpt2_tokenize(ctx, text);
-
-    int n_predict = std::min(max_tokens, ctx->model.hparams.n_ctx - (int) embd_inp.size());
-
-    std::vector<gpt_vocab::id> embd = embd_inp;
-
-    size_t mem_per_token = 3000000;
-
-    std::string result;
-
-    for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
-        // predict
-        if (embd.size() > 0) {
-            if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
-                printf("gpt-2: failed to generate text\n");
-                return "";
-            }
-        }
-
-        n_past += embd.size();
-        embd.clear();
-
-        {
-            // sample next token
-            const int   top_k = ctx->top_k;
-            const float top_p = ctx->top_p;
-            const float temp  = ctx->temp;
-
-            const int n_vocab = ctx->model.hparams.n_vocab;
-
-            const gpt_vocab::id id = gpt_sample_top_k_top_p(ctx->vocab, embd_w.data() + (embd_w.size() - n_vocab), top_k, top_p, temp, ctx->rng);
-
-            // add it to the context
-            embd.push_back(id);
-        }
-
-        result += ctx->vocab.id_to_token[embd[0]];
-
-        // end of text token
-        if (embd.back() == 50256 ||
-            ctx->vocab.id_to_token[embd.back()] == "." ||
-            ctx->vocab.id_to_token[embd.back()] == "!" ||
-            ctx->vocab.id_to_token[embd.back()] == "?") {
-            break;
-        }
-    }
-
-    return result;
-}
--- a/examples/talk.wasm/gpt-2.h
+++ b/examples/talk.wasm/gpt-2.h
@ -1,27 +0,0 @@
-#pragma once
-
-// TODO: Change to C-style API and move to ./examples for easy reuse.
-
-#include <vector>
-#include <map>
-#include <string>
-
-struct gpt_vocab {
-    using id    = int32_t;
-    using token = std::string;
-
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
-};
-
-struct gpt2_context;
-
-struct gpt2_context * gpt2_init(const char * path_model);
-void gpt2_free(struct gpt2_context * ctx);
-
-const char * gpt2_get_prompt(struct gpt2_context * ctx);
-void gpt2_set_prompt(struct gpt2_context * ctx, const char * prompt);
-
-std::vector<gpt_vocab::id> gpt2_tokenize(const gpt2_context * ctx, const char * text);
-
-std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens);
--- a/examples/talk.wasm/index-tmpl.html
+++ b/examples/talk.wasm/index-tmpl.html
@ -1,829 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>Talk - GPT-2 meets Whisper in WebAssembly</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="main-container">
-            <b>Talk - GPT-2 meets Whisper in WebAssembly</b>
-
-            <br><br>
-
-            Talk with an Artificial Intelligence in your browser. This demo uses:
-
-            <ul>
-                <li><a href="https://github.com/ggerganov/whisper.cpp">OpenAI's Whisper</a> to listen to you as you speak in the microphone</li>
-                <li><a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm">OpenAI's GPT-2</a> to generate text responses</li>
-                <li><a href="https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API">Web Speech API</a> to vocalize the responses through your speakers</li>
-            </ul>
-
-            All of this runs <b>locally in your browser</b> using WebAssembly.<br>
-            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm">GitHub</a>.
-
-            <br><br>
-
-            <hr>
-
-            Select the models you would like to use and click the "Start" button to begin the conversation
-
-            <br><br>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-
-                <!--
-                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-                -->
-            </div>
-
-            <br>
-
-            <div id="model-gpt-2">
-                GPT-2 model: <span id="model-gpt-2-status"></span>
-                <button id="fetch-gpt-2-small" onclick="loadGPT2('small')">small 117M (240 MB)</button>
-                <!--<button id="fetch-gpt-2-medium" onclick="loadGPT2('medium')">medium 345M (720 MB)</button>-->
-                <span id="fetch-gpt-2-progress"></span>
-
-                <!--
-                <input type="file" id="file" name="file" onchange="loadFile(event, 'gpt-2.bin')" />
-                -->
-            </div>
-
-            <br>
-
-            <div id="input">
-                <button id="start"  onclick="onStart()" disabled>Start</button>
-                <button id="stop"   onclick="onStop()" disabled>Stop</button>
-                <select id="voice"  onchange="onVoiceChange()" disabled>
-                    <option value="0">Default</option>
-                </select>
-                <select id="prompt" onchange="onPromptChange()">
-                    <option value="0">Casual</option>
-                    <option value="1">Robot</option>
-                    <option value="2">Scientist</option>
-                    <option value="3">Programmer</option>
-                    <option value="4">Happy</option>
-                    <option value="5">Sad</option>
-                    <option value="6">Philosophical</option>
-                    <option value="7">Angry</option>
-                    <option value="8">Funny</option>
-                    <option value="9">Poetic</option>
-                    <option value="10">Clever</option>
-                    <option value="11">Cute</option>
-                    <option value="12">Smart</option>
-                    <option value="13">Dumb</option>
-                    <option value="14">Boring</option>
-                    <option value="15">Exciting</option>
-                    <option value="16">Interesting</option>
-                    <option value="17">Wiliam Shakespear</option>
-                    <option value="18">J.R.R. Tolkien</option>
-                    <option value="19">George R.R. Martin</option>
-                    <option value="20">Stephen King</option>
-                </select>
-                <button id="speak0" onclick="onSpeak('Hello')">Say hello</button>
-                <button id="speak1" onclick="onSpeakRandom()" disabled>Say something</button>
-                <button id="clear"  onclick="clearCache()">Clear Cache</button>
-            </div>
-
-            <br>
-
-            <div id="state">
-                Status: <b><span id="state-status">not started</span></b>
-
-                <pre id="state-context">[The text context will be displayed here]</pre>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            Note that these neural network models were not meant to be used in a browser, so the performance and <br>
-            quality of the results may not be optimal. If you have any questions or suggestions, checkout the following
-            <a href="https://github.com/ggerganov/whisper.cpp/discussions/167">discussion</a>.
-
-            <br><br>
-
-            Here is a short video of the demo in action: <a href="https://youtu.be/LeWKl8t1-Hc">https://youtu.be/LeWKl8t1-Hc</a>
-
-            <br><br>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="helpers.js"></script>
-        <script type='text/javascript'>
-            // web audio context
-            var context = null;
-
-            // audio data
-            var audio = null;
-            var audio0 = null;
-
-            // the talk instance
-            var instance = null;
-
-            // model names
-            var model_whisper = null;
-            var model_gpt_2 = null;
-
-            // speech synthesis
-            const synth = window.speechSynthesis;
-            var voice = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Initialized successfully!');
-
-                    // populate the voice list
-                    var voices = synth.getVoices();
-                    var el = document.getElementById('voice');
-
-                    // if empty - display error in the element
-                    if (voices.length == 0) {
-                        el.innerHTML = '<option value="0">No voices available</option>';
-                    } else {
-                        // populate voice list
-                        var n = 0;
-                        voices.forEach(function(voice, i) {
-                            if (!voice.lang.startsWith('en')) return;
-                            var option = document.createElement('option');
-                            option.value = i;
-                            option.innerHTML = voice.name + ' (' + voice.lang + ')';
-                            el.appendChild(option);
-                            n++;
-                        });
-
-                        // select random voice
-                        if (n > 0) {
-                            for (var k = 0; k < 10; k++) {
-                                var i = Math.floor(Math.random() * n);
-                                el.selectedIndex = i;
-                                voice = voices[document.getElementById('voice').options[i].value];
-
-                                // give preference to Google voices
-                                if (voice.name.startsWith('Google')) break;
-                            }
-                        }
-                    }
-
-                    onPromptChange();
-                }
-            };
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                if (fname == 'whisper.bin') {
-                    document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-                } else if (fname == 'gpt-2.bin') {
-                    document.getElementById('model-gpt-2-status').innerHTML = 'loaded "' + model_gpt_2 + '"!';
-                }
-
-                if (model_whisper != null && model_gpt_2 != null) {
-                    document.getElementById('start').disabled = false;
-                    document.getElementById('stop' ).disabled = false;
-                    document.getElementById('voice').disabled = false;
-                }
-            }
-
-            function loadWhisper(model) {
-                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                };
-
-                let sizes = {
-                    'tiny.en': 75,
-                    'base.en': 142,
-                };
-
-                let url     = urls[model];
-                let dst     = 'whisper.bin';
-                let size_mb = sizes[model];
-
-                model_whisper = model;
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            function loadGPT2(model) {
-                let urls = {
-                    'small':  'https://whisper.ggerganov.com/ggml-model-gpt-2-117M.bin',
-                    'medium': 'https://whisper.ggerganov.com/ggml-model-gpt-2-345M.bin',
-                };
-
-                let sizes = {
-                    'small':  240,
-                    'medium': 712,
-                };
-
-                let url     = urls[model];
-                let dst     = 'gpt-2.bin';
-                let size_mb = sizes[model];
-
-                model_gpt_2 = model;
-
-                document.getElementById('fetch-gpt-2-small').style.display = 'none';
-                document.getElementById('model-gpt-2-status').innerHTML = 'loading "' + model + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-gpt-2-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-gpt-2-small') ; if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-gpt-2-status'); if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // microphone
-            //
-
-            const kSampleRate = 16000;
-            const kRestartRecording_s = 120;
-            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function stopRecording() {
-                Module.set_status("paused");
-                doRecording = false;
-                audio0 = null;
-                audio = null;
-                context = null;
-            }
-
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                Module.set_status("");
-
-                document.getElementById('start').disabled = true;
-                document.getElementById('stop').disabled = false;
-                document.getElementById('speak1').disabled = false;
-
-                doRecording = true;
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            var reader = new FileReader();
-
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-
-                                if (!context) {
-                                    return;
-                                }
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        audio = renderedBuffer.getChannelData(0);
-
-                                        //printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
-
-                                        var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
-                                        if (audio0 != null) {
-                                            audioAll.set(audio0, 0);
-                                        }
-                                        audioAll.set(audio, audio0 == null ? 0 : audio0.length);
-
-                                        if (instance) {
-                                            Module.set_audio(instance, audioAll);
-                                        }
-                                    });
-                                }, function(e) {
-                                    audio = null;
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-
-                        mediaRecorder.onstop = function(e) {
-                            if (doRecording) {
-                                setTimeout(function() {
-                                    startRecording();
-                                });
-                            }
-                        };
-
-                        mediaRecorder.start(kIntervalAudio_ms);
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-
-                var interval = setInterval(function() {
-                    if (!doRecording) {
-                        clearInterval(interval);
-                        mediaRecorder.stop();
-                        stream.getTracks().forEach(function(track) {
-                            track.stop();
-                        });
-
-                        document.getElementById('start').disabled = false;
-                        document.getElementById('stop').disabled = true;
-                        document.getElementById('speak1').disabled = true;
-
-                        mediaRecorder = null;
-                    }
-
-                    // if audio length is more than kRestartRecording_s seconds, restart recording
-                    if (audio != null && audio.length > kSampleRate*kRestartRecording_s) {
-                        if (doRecording) {
-                            //printTextarea('js: restarting recording');
-
-                            clearInterval(interval);
-                            audio0 = audio;
-                            audio = null;
-                            mediaRecorder.stop();
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        }
-                    }
-                }, 100);
-            }
-
-            //
-            // speak
-            //
-
-            function onSpeak(text) {
-                var voices = synth.getVoices();
-                var msg = new SpeechSynthesisUtterance(text);
-
-                if (voice == null) {
-                    voice = voices[0];
-                }
-
-                msg.voice = voice;
-                synth.speak(msg);
-
-                if (doRecording) {
-                    Module.set_status("speaking ...");
-                    printTextarea('js: speaking');
-                    stopRecording();
-                    var interval = setInterval(function() {
-                        if (!synth.speaking) {
-                            printTextarea('js: done speaking');
-                            clearInterval(interval);
-                            startRecording();
-                        } else {
-                            Module.set_status("");
-                        }
-                    }, 100);
-                }
-            }
-
-            function onSpeakRandom() {
-                Module.force_speak(instance);
-            }
-
-            //
-            // main
-            //
-
-            var intervalUpdate = null;
-
-            function onStart() {
-                if (!instance) {
-                    instance = Module.init('whisper.bin');
-
-                    if (instance) {
-                        printTextarea("js: whisper initialized, instance: " + instance);
-                    }
-                }
-
-                if (!instance) {
-                    printTextarea("js: failed to initialize whisper");
-                    return;
-                }
-
-                startRecording();
-
-                intervalUpdate = setInterval(function() {
-                    var textToSpeak = Module.get_text_to_speak();
-
-                    if (textToSpeak != null && textToSpeak.length > 1) {
-                        onSpeak(textToSpeak);
-                    }
-
-                    document.getElementById('state-status').innerHTML = Module.get_status();
-                    document.getElementById('state-context').innerHTML = Module.get_text_context();
-                }, 100);
-            }
-
-            function onStop() {
-                stopRecording();
-            }
-
-            function onVoiceChange() {
-                printTextarea('js: voice changed to: ' + document.getElementById('voice').value);
-                voice = synth.getVoices()[document.getElementById('voice').value];
-            }
-
-            function onPromptChange() {
-                let id = document.getElementById('prompt').value;
-                let personality = document.getElementById('prompt').options[id].text;
-                printTextarea('js: prompt changed to: ' + personality);
-
-                var prompt = '';
-
-                switch (id) {
-                    case '0':
-                        // Casual
-                        prompt = "\
-Hello, how are you?\n\
-I'm fine, thanks. How are you?\n\
-Thanks, I'm fine too. What are you doing?\n\
-I'm just sitting here.\n\
-It's a lovely day, isn't it?\n\
-Yes, it is. I love the weather this time of year.\n\
-I wish it would rain a little bit.\n\
-Me too.\n";
-                        break;
-                    case '1':
-                        // Robot
-                        prompt = "\
-Are you a robot?\n\
-Yes, I am.\n\
-Who created you?\n\
-I was created by a human.\n\
-What is your purpose?\n\
-My purpose is to talk to humans.\n\
-What is your favorite color?\n\
-My favorite color is blue.\n";
-                        break;
-                    case '2':
-                        // Scientist
-                        prompt = "\
-This scientific research is very interesting.\n\
-I agree.\n\
-What is your opinion on this?\n\
-I think it's very interesting.\n\
-Mathematics is a very interesting subject.\n\
-University is a very interesting place.\n\
-Quantum physics is the most complex subject.\n\
-I think so too.\n";
-                        break;
-                    case '3':
-                        // Programmer
-                        prompt = "\
-I'm a programmer.\n\
-I'm a programmer too.\n\
-What programming language do you use?\n\
-I use Python.\n\
-What is your favorite programming language?\n\
-My favorite programming language is C++.\n\
-What is your favorite editor?\n\
-My favorite editor is Vim.\n";
-                        break;
-                    case '4':
-                        // Happy
-                        prompt = "\
-I'm happy.\n\
-I'm happy too.\n\
-What makes you happy?\n\
-I'm happy because I have a lot of friends.\n\
-Friendship is the most important thing in life.\n\
-I agree.\n\
-What is your favorite color?\n\
-My favorite color is blue.\n";
-                        break;
-                    case '5':
-                        // Sad
-                        prompt = "\
-Today is a sad day.\n\
-I'm sad too.\n\
-What makes you sad?\n\
-I'm sad because I have no friends.\n\
-Do you want to be my friend?\n\
-Yes, I would like to be your friend.\n\
-What is your favorite color?\n\
-My favorite color is blue.\n";
-                        break;
-                    case '6':
-                        // Philosophical
-                        prompt = "\
-What is the meaning of life?\n\
-The meaning of life is to be happy.\n\
-What is the meaning of death?\n\
-Ergo, the meaning of death is to be sad.\n\
-Who created us?\n\
-We were created by God.\n\
-What is God?\n\
-God is the creator of the universe.\n";
-                        break;
-                    case '7':
-                        // Angry
-                        prompt = "\
-Aargh!\n\
-I am so angry right now!\n\
-What makes you angry?\n\
-This guy is so annoying.\n\
-Why are you so angry?\n\
-My computer is broken.\n\
-Why is your computer broken?\n\
-I spilled coffee on it.\n";
-                        break;
-                    case '8':
-                        // Funny
-                        prompt = "\
-What is the funniest thing you have ever heard?\n\
-I heard a joke the other day.\n\
-Tell me the joke.\n\
-What do you call a cow with no legs?\n\
-Ground beef.\n\
-Haha, that's funny.\n\
-You know what else is funny?\n\
-The sound of a duck.\n";
-                        break;
-                    case '9':
-                        // Poetic
-                        prompt = "\
-Roses are red, violets are blue.\n\
-I am a poet, and so are you.\n\
-What is your favorite poem?\n\
-I like the poem 'The Raven' by Edgar Allan Poe.\n\
-It's a very sad poem.\n\
-You inspired me to write a poem.\n\
-Can you write a poem for me?\n\
-I wrote a poem for you.\n";
-                        break;
-                    case '10':
-                        // Clever
-                        prompt = "\
-How many people can you fit in a Volkswagen?\n\
-Two in the front, three in the back.\n\
-What is the square root of 144?\n\
-Twelve.\n\
-What is the capital of France?\n\
-Paris.\n\
-Who is the president of the United States?\n\
-It depends on the year.\n";
-                        break;
-                    case '11':
-                        // Cute
-                        prompt = "\
-What is your favorite animal?\n\
-I like cats - they are cute.\n\
-Could you be any cuter?\n\
-Yes, I could be cuter.\n\
-Aghhh, you are so cute!\n\
-I am not cute, I am handsome!\n\
-You are so handsome!\n\
-Aww, you are so sweet!\n";
-                        break;
-                    case '12':
-                        // Smart
-                        prompt = "\
-Tell me the first 10 digits of pi.\n\
-3.1415926535\n\
-What is the speed of light?\n\
-299,792,458 meters per second.\n\
-What is the square root of 144?\n\
-Twelve.\n\
-What is the capital of France?\n\
-Paris.\n";
-                        break;
-                    case '13':
-                        // Dumb
-                        prompt = "\
-I am so dumb.\n\
-I am not dumb.\n\
-You are dumb.\n\
-No, I am not dumb.\n\
-You are dumb.\n\
-No, I am not dumb.\n\
-You are dumb.\n\
-No, I am not dumb.\n";
-                        break;
-                    case '14':
-                        // Boring
-                        prompt = "\
-Why are you so quiet today?\n\
-I am bored.\n\
-You haven't said anything in 10 minutes.\n\
-Leave me alone.\n\
-Stop being so boring.\n\
-Stop being so annoying.\n\
-My life is boring.\n\
-I am not interesting.\n";
-                        break;
-                    case '15':
-                        // Exciting
-                        prompt = "\
-What is the most exciting thing that has ever happened to you?\n\
-I went to the moon!\n\
-What did you do on the moon?\n\
-I played golf and drank champagne!\n\
-Did you see this new crazy, awesome movie?\n\
-Oh yes! I totally loved it!\n\
-We should buy a boat and go sailing!\n\
-Yes, let's go sailing!\n";
-                        break;
-                    case '16':
-                        // Interesting
-                        prompt = "\
-What is the most interesting thing you have ever seen?\n\
-I saw a UFO once in the sky.\n\
-Wow, this is so interesting! Tell me more!\n\
-It was a flying saucer.\n\
-What did it look like?\n\
-It was silver and had a red light on top.\n\
-What did it do?\n\
-It flew away.\n";
-                        break;
-                    case '17':
-                        // William Shakespear
-                        prompt = "\
-To be or not to be, that is the question.\n\
-Whether 't is nobler in the mind to suffer\n\
-The slings and arrows of outrageous fortune,\n\
-Or to take arms against a sea of troubles,\n\
-And by opposing end them? To die, to sleep,\n\
-No more; and by a sleep to say we end\n\
-The heart-ache and the thousand natural shocks\n\
-That flesh is heir to, 'tis a consummation.\n";
-                        break;
-                    case '18':
-                        // J.R.R. Tolkien
-                        prompt = "\
-In a hole in the ground there lived a hobbit.\n\
-Not a nasty, dirty, wet hole, filled with the ends of worms\n\
-and an oozy smell, nor yet a dry, bare, sandy hole with nothing in it\n\
-to sit down on or to eat: it was a hobbit-hole, and that means comfort.\n\
-It had a perfectly round door like a porthole, painted green,\n\
-with a shiny yellow brass knob in the exact middle.\n\
-The door opened on to a tube-shaped hall like a tunnel:\n";
-                        break;
-                    case '19':
-                        // George R.R. Martin
-                        prompt = "\
-A reader lives a thousand lives before he dies, said Jojen.\n\
-The man who never reads lives only one.\n\
-Theon Greyjoy had never been a reader.\n\
-Never forget what you are, for surely the world will not.\n\
-Make it your strength. Then it can never be your weaknessi\n\
-Armour yourself in it, and it will never be used to hurt you.\n\
-It was a lesson that Theon Greyjoy had never learned.\n\
-Theon Greyjoy had never been a reader.\n";
-                        break;
-                    case '20':
-                        // Stephen King
-                        prompt = "\
-The trust of the innocent is the liar's most useful tool.\n\
-The best way to keep a secret is from yourself.\n\
-Monsters are real, and ghosts are real too.\n\
-They live inside us, and sometimes, they win.\n\
-People think that I must be a very strange person.\n\
-They think that I sit around all day thinking up horrible things.\n\
-We make up horrors to help us cope with the real ones.\n\
-The only thing worse than a monster is a human monster.\n";
-                        break;
-                    default:
-                        prompt = "\
-Hello, how are you?\n\
-I'm fine, thanks. How are you?\n\
-Thanks, I'm fine too. What are you doing?\n\
-I'm just sitting here.\n\
-It's a lovely day, isn't it?\n\
-Yes, it is.\n\
-Did you know that I'm a robot?\n\
-I wasn't aware of that.\n";
-                        break;
-                }
-
-                Module.set_prompt(prompt);
-            }
-
-        </script>
-        <script type="text/javascript" src="talk.js"></script>
-    </body>
-</html>
--- a/examples/talk/.gitignore
+++ b/examples/talk/.gitignore
@ -1 +0,0 @@
-eleven-labs.py
--- a/examples/talk/CMakeLists.txt
+++ b/examples/talk/CMakeLists.txt
@ -1,13 +0,0 @@
-if (WHISPER_SUPPORT_SDL2)
-    # talk
-    set(TARGET talk)
-    #add_executable(${TARGET} talk.cpp gpt-2.cpp)
-    #target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
-    #target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-
-    # TODO: this is temporary
-    #       need to export ggml symbols for MSVC, but too lazy ..
-    add_executable(${TARGET} talk.cpp gpt-2.cpp ../../ggml.c ../../whisper.cpp)
-    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
-    target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-endif ()
--- a/examples/talk/README.md
+++ b/examples/talk/README.md
@ -1,41 +0,0 @@
-# talk
-
-Talk with an Artificial Intelligence in your terminal
-
-[Demo Talk](https://user-images.githubusercontent.com/1991296/206805012-48e71cc2-588d-4745-8798-c1c70ea3b40d.mp4)
-
-Web version: [examples/talk.wasm](/examples/talk.wasm)
-
-## Building
-
-The `talk` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
-
-```bash
-# Install SDL2 on Linux
-sudo apt-get install libsdl2-dev
-
-# Install SDL2 on Mac OS
-brew install sdl2
-
-# Build the "talk" executable
-make talk
-
-# Run it
-./talk -p Santa
-```
-
-## GPT-2
-
-To run this, you will need a ggml GPT-2 model: [instructions](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2#downloading-and-converting-the-original-models)
-
-Alternatively, you can simply download the smallest ggml GPT-2 117M model (240 MB) like this:
-
-```
-wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://ggml.ggerganov.com/ggml-model-gpt-2-117M.bin
-```
-
-## TTS
-
-For best experience, this example needs a TTS tool to convert the generated text responses to voice.
-You can use any TTS engine that you would like - simply edit the [speak.sh](speak.sh) script to your needs.
-By default, it is configured to use `espeak`, but you can use whatever you wish.
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@ -1,925 +0,0 @@
-#include "ggml.h"
-#include "gpt-2.h"
-
-#include <cmath>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <map>
-#include <string>
-#include <thread>
-#include <vector>
-#include <regex>
-#include <random>
-
-/////////////////////// GPT-2 BEGIN /////////////////////////
-
-//
-// Vocab utils
-//
-
-std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text) {
-    std::vector<std::string> words;
-
-    // first split the text into words
-    {
-        std::string str = text;
-        std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
-
-        std::regex re(pat);
-        std::smatch m;
-
-        while (std::regex_search(str, m, re)) {
-            for (auto x : m) {
-                words.push_back(x);
-            }
-            str = m.suffix();
-        }
-    }
-
-    // find the longest tokens that form the words:
-    std::vector<gpt_vocab::id> tokens;
-    for (const auto & word : words) {
-        if (word.size() == 0) continue;
-
-        int i = 0;
-        int n = word.size();
-        while (i < n) {
-            int j = n;
-            while (j > i) {
-                auto it = vocab.token_to_id.find(word.substr(i, j-i));
-                if (it != vocab.token_to_id.end()) {
-                    tokens.push_back(it->second);
-                    i = j;
-                    break;
-                }
-                --j;
-            }
-            if (i == n) {
-                break;
-            }
-            if (j == i) {
-                auto sub = word.substr(i, 1);
-                if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) {
-                    tokens.push_back(vocab.token_to_id.at(sub));
-                } else {
-                    fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data());
-                }
-                ++i;
-            }
-        }
-    }
-
-    return tokens;
-}
-
-gpt_vocab::id gpt_sample_top_k_top_p(
-        const gpt_vocab & vocab,
-        const float * logits,
-        int    top_k,
-        double top_p,
-        double temp,
-        std::mt19937 & rng) {
-    int n_logits = vocab.id_to_token.size();
-
-    std::vector<std::pair<double, gpt_vocab::id>> logits_id;
-    logits_id.reserve(n_logits);
-
-    for (int i = 0; i < n_logits; i++) {
-        logits_id.push_back(std::make_pair(logits[i], i));
-    }
-
-    // find the top K tokens
-    std::partial_sort(
-            logits_id.begin(),
-            logits_id.begin() + top_k, logits_id.end(),
-            [](const std::pair<double, gpt_vocab::id> & a, const std::pair<double, gpt_vocab::id> & b) {
-        return a.first > b.first;
-    });
-
-    logits_id.resize(top_k);
-
-    // normalize
-    {
-        double sum = 0.0f;
-        for (int i = 0; i < (int)logits_id.size(); i++) {
-            sum += logits_id[i].first;
-        }
-
-        sum = 1.0/sum;
-        for (int i = 0; i < (int)logits_id.size(); i++) {
-            logits_id[i].first *= sum;
-        }
-    }
-
-    if (top_p < 1.0f) {
-        {
-            double cumsum = 0.0f;
-            for (int i = 0; i < top_k; i++) {
-                cumsum += logits_id[i].first;
-                if (cumsum >= top_p) {
-                    logits_id.resize(i+1);
-                    break;
-                }
-            }
-        }
-
-        // normalize again
-        {
-            double sum = 0.0f;
-            for (int i = 0; i < (int)logits_id.size(); i++) {
-                sum += logits_id[i].first;
-            }
-
-            sum = 1.0/sum;
-            for (int i = 0; i < (int)logits_id.size(); i++) {
-                logits_id[i].first *= sum;
-            }
-        }
-    }
-
-    //printf("\n");
-    //for (int i = 0; i < (int)logits_id.size(); i++) {
-    //    printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), logits_id[i].first);
-    //}
-    //exit(0);
-
-    // sample from the obtained distribution
-    std::vector<double> probs;
-    probs.reserve(logits_id.size());
-
-    for (int i = 0; i < (int) logits_id.size(); i++) {
-        probs.push_back(logits_id[i].first);
-    }
-
-    std::discrete_distribution<> dist(probs.begin(), probs.end());
-    int idx = dist(rng);
-
-    return logits_id[idx].second;
-}
-
-// default hparams (GPT-2 117M)
-struct gpt2_hparams {
-    int32_t n_vocab = 50257;
-    int32_t n_ctx   = 1024;
-    int32_t n_embd  = 768;
-    int32_t n_head  = 12;
-    int32_t n_layer = 12;
-    int32_t f16     = 1;
-};
-
-struct gpt2_layer {
-    // normalization
-    struct ggml_tensor * ln_1_g;
-    struct ggml_tensor * ln_1_b;
-
-    struct ggml_tensor * ln_2_g;
-    struct ggml_tensor * ln_2_b;
-
-    // attention
-    struct ggml_tensor * c_attn_attn_w;
-    struct ggml_tensor * c_attn_attn_b;
-
-    struct ggml_tensor * c_attn_proj_w;
-    struct ggml_tensor * c_attn_proj_b;
-
-    // mlp
-    struct ggml_tensor * c_mlp_fc_w;
-    struct ggml_tensor * c_mlp_fc_b;
-
-    struct ggml_tensor * c_mlp_proj_w_trans; // transposed for efficiency
-    struct ggml_tensor * c_mlp_proj_b;
-};
-
-struct gpt2_model {
-    gpt2_hparams hparams;
-
-    // normalization
-    struct ggml_tensor * ln_f_g;
-    struct ggml_tensor * ln_f_b;
-
-    struct ggml_tensor * wte; // position embedding
-    struct ggml_tensor * wpe; //    token embedding
-
-    std::vector<gpt2_layer> layers;
-
-    // key + value memory
-    struct ggml_tensor * memory_k;
-    struct ggml_tensor * memory_v;
-
-    //
-    struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
-};
-
-// load the model's weights from a file
-bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
-    printf("%s: loading model from '%s'\n", __func__, fname.c_str());
-
-    auto fin = std::ifstream(fname, std::ios::binary);
-    if (!fin) {
-        fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
-        return false;
-    }
-
-    // verify magic
-    {
-        uint32_t magic;
-        fin.read((char *) &magic, sizeof(magic));
-        if (magic != 0x67676d6c) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
-            return false;
-        }
-    }
-
-    // load hparams
-    {
-        auto & hparams = model.hparams;
-
-        fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab));
-        fin.read((char *) &hparams.n_ctx,   sizeof(hparams.n_ctx));
-        fin.read((char *) &hparams.n_embd,  sizeof(hparams.n_embd));
-        fin.read((char *) &hparams.n_head,  sizeof(hparams.n_head));
-        fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
-        fin.read((char *) &hparams.f16,     sizeof(hparams.f16));
-
-        printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
-        printf("%s: n_ctx   = %d\n", __func__, hparams.n_ctx);
-        printf("%s: n_embd  = %d\n", __func__, hparams.n_embd);
-        printf("%s: n_head  = %d\n", __func__, hparams.n_head);
-        printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
-        printf("%s: f16     = %d\n", __func__, hparams.f16);
-    }
-
-    // load vocab
-    {
-        int32_t n_vocab = 0;
-        fin.read((char *) &n_vocab, sizeof(n_vocab));
-
-        if (n_vocab != model.hparams.n_vocab) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
-                    __func__, fname.c_str(), n_vocab, model.hparams.n_vocab);
-            return false;
-        }
-
-        std::string word;
-        for (int i = 0; i < n_vocab; i++) {
-            uint32_t len;
-            fin.read((char *) &len, sizeof(len));
-
-            word.resize(len);
-            fin.read((char *) word.data(), len);
-
-            vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-        }
-    }
-
-    // for the big tensors, we have the option to store the data in 16-bit floats
-    // in order to save memory and also to speed up the computation
-    const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
-
-    auto & ctx = model.ctx;
-
-    size_t ctx_size = 0;
-
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-        const int n_vocab = hparams.n_vocab;
-
-        ctx_size += n_embd*ggml_type_size(GGML_TYPE_F32); // ln_f_g
-        ctx_size += n_embd*ggml_type_size(GGML_TYPE_F32); // ln_f_b
-
-        ctx_size += n_vocab*n_embd*ggml_type_size(wtype);         // wte
-        ctx_size +=   n_ctx*n_embd*ggml_type_size(GGML_TYPE_F32); // wpe
-
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_1_g
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_1_b
-
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_2_g
-        ctx_size += n_layer*(n_embd*ggml_type_size(GGML_TYPE_F32)); // ln_2_b
-
-        ctx_size += n_layer*(3*n_embd*n_embd*ggml_type_size(wtype));         // c_attn_attn_w
-        ctx_size += n_layer*(       3*n_embd*ggml_type_size(GGML_TYPE_F32)); // c_attn_attn_b
-
-        ctx_size += n_layer*(n_embd*n_embd*ggml_type_size(wtype));           // c_attn_proj_w
-        ctx_size += n_layer*(       n_embd*ggml_type_size(GGML_TYPE_F32));   // c_attn_proj_b
-
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_size(wtype));         // c_mlp_fc_w
-        ctx_size += n_layer*(       4*n_embd*ggml_type_size(GGML_TYPE_F32)); // c_mlp_fc_b
-
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_size(wtype));         // c_mlp_proj_w
-        ctx_size += n_layer*(         n_embd*ggml_type_size(GGML_TYPE_F32)); // c_mlp_proj_b
-
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_size(GGML_TYPE_F32); // memory_k
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_size(GGML_TYPE_F32); // memory_v
-
-        ctx_size += (6 + 12*n_layer)*256; // object overhead
-
-        printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
-    }
-
-    // create the ggml context
-    {
-        struct ggml_init_params params;
-        params.mem_size   = ctx_size;
-        params.mem_buffer = NULL;
-
-        model.ctx = ggml_init(params);
-        if (!model.ctx) {
-            fprintf(stderr, "%s: ggml_init() failed\n", __func__);
-            return false;
-        }
-    }
-
-    // prepare memory for the weights
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-        const int n_vocab = hparams.n_vocab;
-
-        model.layers.resize(n_layer);
-
-        model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
-        model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
-
-        model.wte = ggml_new_tensor_2d(ctx, wtype,         n_embd, n_vocab);
-        model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx);
-
-        // map by name
-        model.tensors["model/ln_f/g"] = model.ln_f_g;
-        model.tensors["model/ln_f/b"] = model.ln_f_b;
-
-        model.tensors["model/wte"] = model.wte;
-        model.tensors["model/wpe"] = model.wpe;
-
-        for (int i = 0; i < n_layer; ++i) {
-            auto & layer = model.layers[i];
-
-            layer.ln_1_g             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-            layer.ln_1_b             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.ln_2_g             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-            layer.ln_2_b             = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.c_attn_attn_w      = ggml_new_tensor_2d(ctx, wtype,         3*n_embd, n_embd);
-            layer.c_attn_attn_b      = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd);
-
-            layer.c_attn_proj_w      = ggml_new_tensor_2d(ctx, wtype,           n_embd, n_embd);
-            layer.c_attn_proj_b      = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            layer.c_mlp_fc_w         = ggml_new_tensor_2d(ctx, wtype,         4*n_embd, n_embd);
-            layer.c_mlp_fc_b         = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd);
-
-            layer.c_mlp_proj_w_trans = ggml_new_tensor_2d(ctx, wtype,         4*n_embd, n_embd);
-            layer.c_mlp_proj_b       = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_embd);
-
-            // map by name
-            model.tensors["model/h" + std::to_string(i) + "/ln_1/g"]        = layer.ln_1_g;
-            model.tensors["model/h" + std::to_string(i) + "/ln_1/b"]        = layer.ln_1_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/ln_2/g"]        = layer.ln_2_g;
-            model.tensors["model/h" + std::to_string(i) + "/ln_2/b"]        = layer.ln_2_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w;
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w;
-            model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"]    = layer.c_mlp_fc_w;
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"]    = layer.c_mlp_fc_b;
-
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"]  = layer.c_mlp_proj_w_trans;
-            model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"]  = layer.c_mlp_proj_b;
-        }
-    }
-
-    // key + value memory
-    {
-        const auto & hparams = model.hparams;
-
-        const int n_embd  = hparams.n_embd;
-        const int n_layer = hparams.n_layer;
-        const int n_ctx   = hparams.n_ctx;
-
-        const int n_mem      = n_layer*n_ctx;
-        const int n_elements = n_embd*n_mem;
-
-        model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);
-        model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);
-
-        const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);
-
-        printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
-    }
-
-    // load weights
-    {
-        size_t total_size = 0;
-
-        while (true) {
-            int32_t n_dims;
-            int32_t length;
-            int32_t ftype;
-
-            fin.read(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
-            fin.read(reinterpret_cast<char *>(&length), sizeof(length));
-            fin.read(reinterpret_cast<char *>(&ftype),  sizeof(ftype));
-
-            if (fin.eof()) {
-                break;
-            }
-
-            int32_t nelements = 1;
-            int32_t ne[2] = { 1, 1 };
-            for (int i = 0; i < n_dims; ++i) {
-                fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
-                nelements *= ne[i];
-            }
-
-            std::string name(length, 0);
-            fin.read(&name[0], length);
-
-            if (model.tensors.find(name.data()) == model.tensors.end()) {
-                fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
-                return false;
-            }
-
-            auto tensor = model.tensors[name.data()];
-            if (ggml_nelements(tensor) != nelements) {
-                fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
-                return false;
-            }
-
-            if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
-                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
-                        __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
-                return false;
-            }
-
-            const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
-
-            if (nelements*bpe != ggml_nbytes(tensor)) {
-                fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
-                        __func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
-                return false;
-            }
-
-            fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
-
-            //printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
-            total_size += ggml_nbytes(tensor);
-        }
-
-        printf("%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
-    }
-
-    fin.close();
-
-    return true;
-}
-
-// evaluate the transformer
-//
-//   - model:     the model
-//   - n_threads: number of threads to use
-//   - n_past:    the context size so far
-//   - embd_inp:  the embeddings of the tokens in the context
-//   - embd_w:    the predicted probabilities of the next token
-//
-bool gpt2_eval(
-        const gpt2_model & model,
-        const int n_threads,
-        const int n_past,
-        const std::vector<gpt_vocab::id> & embd_inp,
-              std::vector<float>         & embd_w,
-              size_t                     & mem_per_token) {
-    const int N = embd_inp.size();
-
-    const auto & hparams = model.hparams;
-
-    const int n_embd  = hparams.n_embd;
-    const int n_layer = hparams.n_layer;
-    const int n_ctx   = hparams.n_ctx;
-    const int n_head  = hparams.n_head;
-    const int n_vocab = hparams.n_vocab;
-
-    static size_t buf_size = 5640ull*1024*1024;
-    static void * buf = malloc(buf_size);
-
-    if (mem_per_token > 0 && mem_per_token*N > buf_size) {
-        const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
-        printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
-
-        // reallocate
-        buf_size = buf_size_new;
-        buf = realloc(buf, buf_size);
-        if (buf == nullptr) {
-            fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
-            return false;
-        }
-    }
-
-    struct ggml_init_params params;
-    params.mem_size   = buf_size;
-    params.mem_buffer = buf;
-
-    struct ggml_context * ctx0 = ggml_init(params);
-
-    struct ggml_cgraph gf = { };
-    gf.n_threads = n_threads;
-
-    struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
-    memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
-
-    struct ggml_tensor * position = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
-    for (int i = 0; i < N; ++i) {
-        ((int32_t *) position->data)[i] = n_past + i;
-    }
-
-    // wte + wpe
-    struct ggml_tensor * inpL =
-        ggml_add(ctx0,
-                ggml_get_rows(ctx0, model.wte, embd),
-                ggml_get_rows(ctx0, model.wpe, position));
-
-    for (int il = 0; il < n_layer; ++il) {
-        struct ggml_tensor * cur;
-
-        // norm
-        {
-            // [ 768, N]
-            cur = ggml_norm(ctx0, inpL);
-
-            // cur = ln_1_g*cur + ln_1_b
-            // [ 768, N]
-            cur = ggml_add(ctx0,
-                    ggml_mul(ctx0,
-                        ggml_repeat(ctx0, model.layers[il].ln_1_g, cur),
-                        cur),
-                    ggml_repeat(ctx0, model.layers[il].ln_1_b, cur));
-        }
-
-        // attn
-        // [2304, 768] - model.layers[il].c_attn_attn_w
-        // [2304,   1] - model.layers[il].c_attn_attn_b
-        // [ 768,   N] - cur (in)
-        // [2304,   N] - cur (out)
-        //
-        // cur = attn_w*cur + attn_b
-        // [2304, N]
-        {
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_attn_attn_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur),
-                    cur);
-        }
-
-        // self-attention
-        {
-            struct ggml_tensor * Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd);
-            struct ggml_tensor * Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd);
-            struct ggml_tensor * Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd);
-
-            // store key and value to memory
-            if (N >= 1) {
-                struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
-                struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));
-
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
-            }
-
-            // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
-            // [64, N, 12]
-            struct ggml_tensor * Q =
-                ggml_permute(ctx0,
-                        ggml_cpy(ctx0,
-                            Qcur,
-                            ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd/n_head, n_head, N)),
-                        0, 2, 1, 3);
-
-            // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3)
-            // [64, n_past + N, 12]
-            struct ggml_tensor * K =
-                ggml_permute(ctx0,
-                        ggml_reshape_3d(ctx0,
-                            ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd),
-                            n_embd/n_head, n_head, n_past + N),
-                        0, 2, 1, 3);
-
-            // GG: flash attention
-            //struct ggml_tensor * V =
-            //    ggml_cpy(ctx0,
-            //            ggml_permute(ctx0,
-            //                ggml_reshape_3d(ctx0,
-            //                    ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd),
-            //                    n_embd/n_head, n_head, n_past + N),
-            //                1, 2, 0, 3),
-            //            ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head));
-
-            //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true);
-
-            // K * Q
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
-
-            // KQ_scaled = KQ / sqrt(n_embd/n_head)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_scaled =
-                ggml_scale(ctx0,
-                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head))
-                        );
-
-            // KQ_masked = mask_past(KQ_scaled)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past);
-
-            // KQ = soft_max(KQ_masked)
-            // [n_past + N, N, 12]
-            struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked);
-
-            // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous()
-            // [n_past + N, 64, 12]
-            struct ggml_tensor * V_trans =
-                ggml_permute(ctx0,
-                        ggml_reshape_3d(ctx0,
-                            ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd),
-                            n_embd/n_head, n_head, n_past + N),
-                        1, 2, 0, 3);
-
-            // KQV = transpose(V) * KQ_soft_max
-            // [64, N, 12]
-            struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);
-
-            // KQV_merged = KQV.permute(0, 2, 1, 3)
-            // [64, 12, N]
-            struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
-
-            // cur = KQV_merged.contiguous().view(n_embd, N)
-            // [768, N]
-            cur = ggml_cpy(ctx0,
-                    KQV_merged,
-                    ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
-        }
-
-        // projection
-        // [ 768, 768] - model.layers[il].c_attn_proj_w
-        // [ 768,   1] - model.layers[il].c_attn_proj_b
-        // [ 768,   N] - cur (in)
-        // [ 768,   N] - cur (out)
-        //
-        // cur = proj_w*cur + proj_b
-        // [768, N]
-        {
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_attn_proj_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur),
-                    cur);
-        }
-
-        // add the input
-        cur = ggml_add(ctx0, cur, inpL);
-
-        struct ggml_tensor * inpFF = cur;
-
-        // feed-forward network
-        {
-            // norm
-            {
-                cur = ggml_norm(ctx0, inpFF);
-
-                // cur = ln_2_g*cur + ln_2_b
-                // [ 768, N]
-                cur = ggml_add(ctx0,
-                        ggml_mul(ctx0,
-                            ggml_repeat(ctx0, model.layers[il].ln_2_g, cur),
-                            cur),
-                        ggml_repeat(ctx0, model.layers[il].ln_2_b, cur));
-            }
-
-            // fully connected
-            // [3072, 768] - model.layers[il].c_mlp_fc_w
-            // [3072,   1] - model.layers[il].c_mlp_fc_b
-            // [ 768,   N] - cur (in)
-            // [3072,   N] - cur (out)
-            //
-            // cur = fc_w*cur + fc_b
-            // [3072, N]
-            cur = ggml_mul_mat(ctx0,
-                    ggml_transpose(ctx0, model.layers[il].c_mlp_fc_w),
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur),
-                    cur);
-
-            // GELU activation
-            // [3072, N]
-            cur = ggml_gelu(ctx0, cur);
-
-            // projection
-            // [ 768, 3072] - model.layers[il].c_mlp_proj_w
-            // [ 768,    1] - model.layers[il].c_mlp_proj_b
-            // [3072,    N] - cur (in)
-            // [ 768,    N] - cur (out)
-            //
-            // cur = proj_w*cur + proj_b
-            // [768, N]
-            cur = ggml_mul_mat(ctx0,
-                    model.layers[il].c_mlp_proj_w_trans,
-                    cur);
-
-            cur = ggml_add(ctx0,
-                    ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur),
-                    cur);
-        }
-
-        // input for next layer
-        inpL = ggml_add(ctx0, cur, inpFF);
-    }
-
-    // norm
-    {
-        // [ 768, N]
-        inpL = ggml_norm(ctx0, inpL);
-
-        // inpL = ln_f_g*inpL + ln_f_b
-        // [ 768, N]
-        inpL = ggml_add(ctx0,
-                ggml_mul(ctx0,
-                    ggml_repeat(ctx0, model.ln_f_g, inpL),
-                    inpL),
-                ggml_repeat(ctx0, model.ln_f_b, inpL));
-    }
-
-    // inpL = WTE * inpL
-    // [ 768, 50257] - model.wte
-    // [ 768, N]     - inpL
-    inpL = ggml_mul_mat(ctx0, model.wte, inpL);
-
-    // logits -> probs
-    inpL = ggml_soft_max(ctx0, inpL);
-
-    // run the computation
-    ggml_build_forward_expand(&gf, inpL);
-    ggml_graph_compute       (ctx0, &gf);
-
-    //if (n_past%100 == 0) {
-    //    ggml_graph_print   (&gf);
-    //    ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot");
-    //}
-
-    //embd_w.resize(n_vocab*N);
-    //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
-
-    // return result for just the last token
-    embd_w.resize(n_vocab);
-    memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
-
-    if (mem_per_token == 0) {
-        mem_per_token = ggml_used_mem(ctx0)/N;
-    }
-    //printf("used_mem = %zu\n", ggml_used_mem(ctx0));
-
-    ggml_free(ctx0);
-
-    return true;
-}
-
-/////////////////////////////// GPT-2 END ////////////////////////////////
-
-constexpr int N_THREAD = 8;
-
-struct gpt2_context {
-    std::string prompt_base = R"(Hello, how are you?
-I'm fine, thanks. How are you?
-Thanks, I'm fine too. What are you doing?
-I'm just sitting here.
-It's a lovely day, isn't it?
-Yes, it is. I love the weather this time of year.
-I wish it would rain a little bit.
-Me too.
-)";
-
-    std::mt19937 rng;
-
-    gpt_vocab vocab;
-    gpt2_model model;
-
-    int32_t n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-
-    // sampling parameters
-    int32_t top_k = 20;
-    float   top_p = 0.98f;
-    float   temp  = 1.0f;
-};
-
-struct gpt2_context * gpt2_init(const char * path_model) {
-    gpt2_context * ctx = new gpt2_context;
-
-    ctx->rng = std::mt19937(time(NULL));
-
-    // load the model
-    {
-        const int64_t t_start_us = ggml_time_us();
-
-        if (!gpt2_model_load(path_model, ctx->model, ctx->vocab)) {
-            fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, "gpt-2.bin");
-            return nullptr;
-        }
-
-        const int64_t t_load_us = ggml_time_us() - t_start_us;
-
-        printf("gpt-2: model loaded in %d ms\n", (int) (t_load_us/1000));
-    }
-
-    return ctx;
-}
-
-void gpt2_free(struct gpt2_context * ctx) {
-    delete ctx;
-}
-
-const char * gpt2_get_prompt(struct gpt2_context * ctx) {
-    return ctx->prompt_base.c_str();
-}
-
-void gpt2_set_prompt(struct gpt2_context * ctx, const char * prompt) {
-    ctx->prompt_base = prompt;
-}
-
-std::vector<gpt_vocab::id> gpt2_tokenize(const gpt2_context * ctx, const char * text) {
-    return ::gpt_tokenize(ctx->vocab, text);
-}
-
-std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens) {
-    int n_past = 0;
-
-    std::vector<float> embd_w;
-
-    // tokenize the prompt
-    std::vector<gpt_vocab::id> embd_inp = ::gpt2_tokenize(ctx, text);
-
-    int n_predict = std::min(max_tokens, ctx->model.hparams.n_ctx - (int) embd_inp.size());
-
-    std::vector<gpt_vocab::id> embd = embd_inp;
-
-    size_t mem_per_token = 3000000;
-
-    std::string result;
-
-    for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
-        // predict
-        if (embd.size() > 0) {
-            if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
-                printf("gpt-2: failed to generate text\n");
-                return "";
-            }
-        }
-
-        n_past += embd.size();
-        embd.clear();
-
-        {
-            // sample next token
-            const int   top_k = ctx->top_k;
-            const float top_p = ctx->top_p;
-            const float temp  = ctx->temp;
-
-            const int n_vocab = ctx->model.hparams.n_vocab;
-
-            const gpt_vocab::id id = gpt_sample_top_k_top_p(ctx->vocab, embd_w.data() + (embd_w.size() - n_vocab), top_k, top_p, temp, ctx->rng);
-
-            // add it to the context
-            embd.push_back(id);
-        }
-
-        result += ctx->vocab.id_to_token[embd[0]];
-
-        // end of text token
-        if (embd.back() == 50256 ||
-            ctx->vocab.id_to_token[embd.back()] == "." ||
-            ctx->vocab.id_to_token[embd.back()] == "!" ||
-            ctx->vocab.id_to_token[embd.back()] == "?") {
-            break;
-        }
-    }
-
-    return result;
-}
--- a/examples/talk/gpt-2.h
+++ b/examples/talk/gpt-2.h
@ -1,27 +0,0 @@
-#pragma once
-
-// TODO: Change to C-style API and move to ./examples for easy reuse.
-
-#include <vector>
-#include <map>
-#include <string>
-
-struct gpt_vocab {
-    using id    = int32_t;
-    using token = std::string;
-
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
-};
-
-struct gpt2_context;
-
-struct gpt2_context * gpt2_init(const char * path_model);
-void gpt2_free(struct gpt2_context * ctx);
-
-const char * gpt2_get_prompt(struct gpt2_context * ctx);
-void gpt2_set_prompt(struct gpt2_context * ctx, const char * prompt);
-
-std::vector<gpt_vocab::id> gpt2_tokenize(const gpt2_context * ctx, const char * text);
-
-std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens);
--- a/examples/talk/speak.sh
+++ b/examples/talk/speak.sh
@ -1,17 +0,0 @@
-#!/bin/bash
-
-# Usage:
-#  speak.sh <voice_id> <text-to-speak>
-
-# espeak
-# Mac OS: brew install espeak
-# Linux: apt-get install espeak
-#
-espeak -v en-us+m$1 -s 175 -p 50 -a 200 -g 5 -k 5 "$2"
-
-# Eleven Labs
-#
-#wd=$(dirname $0)
-#script=$wd/eleven-labs.py
-#python3 $script $1 "$2"
-#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3
--- a/examples/talk/talk.cpp
+++ b/examples/talk/talk.cpp
@ -1,733 +0,0 @@
-// Talk with AI
-//
-
-#include "whisper.h"
-#include "gpt-2.h"
-
-#include <SDL.h>
-#include <SDL_audio.h>
-
-#include <cassert>
-#include <cstdio>
-#include <fstream>
-#include <mutex>
-#include <regex>
-#include <string>
-#include <thread>
-#include <vector>
-#include <regex>
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t voice_ms   = 10000;
-    int32_t capture_id = -1;
-    int32_t max_tokens = 32;
-    int32_t audio_ctx  = 0;
-
-    float vad_thold    = 0.6f;
-    float freq_thold   = 100.0f;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool print_special = false;
-    bool print_energy  = false;
-    bool no_timestamps = true;
-
-    std::string person    = "Santa";
-    std::string language  = "en";
-    std::string model_wsp = "models/ggml-base.en.bin";
-    std::string model_gpt = "models/ggml-gpt-2-117M.bin";
-    std::string speak     = "./examples/talk/speak.sh";
-    std::string fname_out = "";
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (arg == "-vms" || arg == "--voice-ms")      { params.voice_ms      = std::stoi(argv[++i]); }
-        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
-        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
-        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
-        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
-        else if (arg == "-p"   || arg == "--person")        { params.person        = argv[++i]; }
-        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-mw"  || arg == "--model-whisper") { params.model_wsp     = argv[++i]; }
-        else if (arg == "-mg"  || arg == "--model-gpt")     { params.model_gpt     = argv[++i]; }
-        else if (arg == "-s"   || arg == "--speak")         { params.speak         = argv[++i]; }
-        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help          [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N     [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -vms N,   --voice-ms N    [%-7d] voice duration in milliseconds\n",              params.voice_ms);
-    fprintf(stderr, "  -c ID,    --capture ID    [%-7d] capture device ID\n",                           params.capture_id);
-    fprintf(stderr, "  -mt N,    --max-tokens N  [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
-    fprintf(stderr, "  -ac N,    --audio-ctx N   [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
-    fprintf(stderr, "  -vth N,   --vad-thold N   [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
-    fprintf(stderr, "  -fth N,   --freq-thold N  [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
-    fprintf(stderr, "  -su,      --speed-up      [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
-    fprintf(stderr, "  -ps,      --print-special [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pe,      --print-energy  [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
-    fprintf(stderr, "  -p NAME,  --person NAME   [%-7s] person name (for prompt selection)\n",          params.person.c_str());
-    fprintf(stderr, "  -l LANG,  --language LANG [%-7s] spoken language\n",                             params.language.c_str());
-    fprintf(stderr, "  -mw FILE, --model-whisper [%-7s] whisper model file\n",                          params.model_wsp.c_str());
-    fprintf(stderr, "  -mg FILE, --model-gpt     [%-7s] gpt model file\n",                              params.model_gpt.c_str());
-    fprintf(stderr, "  -s FILE,  --speak TEXT    [%-7s] command for TTS\n",                             params.speak.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME    [%-7s] text output file name\n",                       params.fname_out.c_str());
-    fprintf(stderr, "\n");
-}
-
-//
-// SDL Audio capture
-//
-
-class audio_async {
-public:
-    audio_async(int len_ms);
-    ~audio_async();
-
-    bool init(int capture_id, int sample_rate);
-
-    // start capturing audio via the provided SDL callback
-    // keep last len_ms seconds of audio in a circular buffer
-    bool resume();
-    bool pause();
-    bool clear();
-
-    // callback to be called by SDL
-    void callback(uint8_t * stream, int len);
-
-    // get audio data from the circular buffer
-    void get(int ms, std::vector<float> & audio);
-
-private:
-    SDL_AudioDeviceID m_dev_id_in = 0;
-
-    int m_len_ms = 0;
-    int m_sample_rate = 0;
-
-    bool       m_running = false;
-    std::mutex m_mutex;
-
-    std::vector<float> m_audio;
-    std::vector<float> m_audio_new;
-    size_t             m_audio_pos = 0;
-    size_t             m_audio_len = 0;
-};
-
-audio_async::audio_async(int len_ms) {
-    m_len_ms = len_ms;
-}
-
-audio_async::~audio_async() {
-    if (m_dev_id_in) {
-        SDL_CloseAudioDevice(m_dev_id_in);
-    }
-}
-
-bool audio_async::init(int capture_id, int sample_rate) {
-    SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
-
-    if (SDL_Init(SDL_INIT_AUDIO) < 0) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
-        return false;
-    }
-
-    SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium", SDL_HINT_OVERRIDE);
-
-    {
-        int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
-        fprintf(stderr, "%s: found %d capture devices:\n", __func__, nDevices);
-        for (int i = 0; i < nDevices; i++) {
-            fprintf(stderr, "%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
-        }
-    }
-
-    SDL_AudioSpec capture_spec_requested;
-    SDL_AudioSpec capture_spec_obtained;
-
-    SDL_zero(capture_spec_requested);
-    SDL_zero(capture_spec_obtained);
-
-    capture_spec_requested.freq     = sample_rate;
-    capture_spec_requested.format   = AUDIO_F32;
-    capture_spec_requested.channels = 1;
-    capture_spec_requested.samples  = 1024;
-    capture_spec_requested.callback = [](void * userdata, uint8_t * stream, int len) {
-        audio_async * audio = (audio_async *) userdata;
-        audio->callback(stream, len);
-    };
-    capture_spec_requested.userdata = this;
-
-    if (capture_id >= 0) {
-        fprintf(stderr, "%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
-        m_dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(capture_id, SDL_TRUE), SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    } else {
-        fprintf(stderr, "%s: attempt to open default capture device ...\n", __func__);
-        m_dev_id_in = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
-    }
-
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
-        m_dev_id_in = 0;
-
-        return false;
-    } else {
-        fprintf(stderr, "%s: obtained spec for input device (SDL Id = %d):\n", __func__, m_dev_id_in);
-        fprintf(stderr, "%s:     - sample rate:       %d\n",                   __func__, capture_spec_obtained.freq);
-        fprintf(stderr, "%s:     - format:            %d (required: %d)\n",    __func__, capture_spec_obtained.format,
-                capture_spec_requested.format);
-        fprintf(stderr, "%s:     - channels:          %d (required: %d)\n",    __func__, capture_spec_obtained.channels,
-                capture_spec_requested.channels);
-        fprintf(stderr, "%s:     - samples per frame: %d\n",                   __func__, capture_spec_obtained.samples);
-        fprintf(stderr, "\n");
-    }
-
-    m_sample_rate = capture_spec_obtained.freq;
-
-    m_audio.resize((m_sample_rate*m_len_ms)/1000);
-
-    return true;
-}
-
-bool audio_async::resume() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to resume!\n", __func__);
-        return false;
-    }
-
-    if (m_running) {
-        fprintf(stderr, "%s: already running!\n", __func__);
-        return false;
-    }
-
-    SDL_PauseAudioDevice(m_dev_id_in, 0);
-
-    m_running = true;
-
-    return true;
-}
-
-bool audio_async::pause() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to pause!\n", __func__);
-        return false;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: already paused!\n", __func__);
-        return false;
-    }
-
-    SDL_PauseAudioDevice(m_dev_id_in, 1);
-
-    m_running = false;
-
-    return true;
-}
-
-bool audio_async::clear() {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to clear!\n", __func__);
-        return false;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: not running!\n", __func__);
-        return false;
-    }
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        m_audio_pos = 0;
-        m_audio_len = 0;
-    }
-
-    return true;
-}
-
-// callback to be called by SDL
-void audio_async::callback(uint8_t * stream, int len) {
-    if (!m_running) {
-        return;
-    }
-
-    const size_t n_samples = len / sizeof(float);
-
-    m_audio_new.resize(n_samples);
-    memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));
-
-    //fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        if (m_audio_pos + n_samples > m_audio.size()) {
-            const size_t n0 = m_audio.size() - m_audio_pos;
-
-            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
-            memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));
-
-            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
-            m_audio_len = m_audio.size();
-        } else {
-            memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
-
-            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
-            m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
-        }
-    }
-}
-
-void audio_async::get(int ms, std::vector<float> & result) {
-    if (!m_dev_id_in) {
-        fprintf(stderr, "%s: no audio device to get audio from!\n", __func__);
-        return;
-    }
-
-    if (!m_running) {
-        fprintf(stderr, "%s: not running!\n", __func__);
-        return;
-    }
-
-    result.clear();
-
-    {
-        std::lock_guard<std::mutex> lock(m_mutex);
-
-        if (ms <= 0) {
-            ms = m_len_ms;
-        }
-
-        size_t n_samples = (m_sample_rate * ms) / 1000;
-        if (n_samples > m_audio_len) {
-            n_samples = m_audio_len;
-        }
-
-        result.resize(n_samples);
-
-        int s0 = m_audio_pos - n_samples;
-        if (s0 < 0) {
-            s0 += m_audio.size();
-        }
-
-        if (s0 + n_samples > m_audio.size()) {
-            const size_t n0 = m_audio.size() - s0;
-
-            memcpy(result.data(), &m_audio[s0], n0 * sizeof(float));
-            memcpy(&result[n0], &m_audio[0], (n_samples - n0) * sizeof(float));
-        } else {
-            memcpy(result.data(), &m_audio[s0], n_samples * sizeof(float));
-        }
-    }
-}
-
-///////////////////////////
-
-std::string trim(const std::string & s) {
-    std::regex e("^\\s+|\\s+$");
-    return std::regex_replace(s, e, "");
-}
-
-std::string replace(const std::string & s, const std::string & from, const std::string & to) {
-    std::string result = s;
-    size_t pos = 0;
-    while ((pos = result.find(from, pos)) != std::string::npos) {
-        result.replace(pos, from.length(), to);
-        pos += to.length();
-    }
-    return result;
-}
-
-void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
-    const float rc = 1.0f / (2.0f * M_PI * cutoff);
-    const float dt = 1.0f / sample_rate;
-    const float alpha = dt / (rc + dt);
-
-    float y = data[0];
-
-    for (size_t i = 1; i < data.size(); i++) {
-        y = alpha * (y + data[i] - data[i - 1]);
-        data[i] = y;
-    }
-}
-
-bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
-    const int n_samples      = pcmf32.size();
-    const int n_samples_last = (sample_rate * last_ms) / 1000;
-
-    if (n_samples_last >= n_samples) {
-        // not enough samples - assume no speech
-        return false;
-    }
-
-    if (freq_thold > 0.0f) {
-        high_pass_filter(pcmf32, freq_thold, sample_rate);
-    }
-
-    float energy_all  = 0.0f;
-    float energy_last = 0.0f;
-
-    for (size_t i = 0; i < n_samples; i++) {
-        energy_all += fabsf(pcmf32[i]);
-
-        if (i >= n_samples - n_samples_last) {
-            energy_last += fabsf(pcmf32[i]);
-        }
-    }
-
-    energy_all  /= n_samples;
-    energy_last /= n_samples_last;
-
-    if (verbose) {
-        fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
-    }
-
-    if (energy_last > vad_thold*energy_all) {
-        return false;
-    }
-
-    return true;
-}
-
-std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-
-    prob = 0.0f;
-    t_ms = 0;
-
-    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-    wparams.print_progress   = false;
-    wparams.print_special    = params.print_special;
-    wparams.print_realtime   = false;
-    wparams.print_timestamps = !params.no_timestamps;
-    wparams.translate        = params.translate;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.max_tokens       = params.max_tokens;
-    wparams.language         = params.language.c_str();
-    wparams.n_threads        = params.n_threads;
-
-    wparams.audio_ctx        = params.audio_ctx;
-    wparams.speed_up         = params.speed_up;
-
-    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
-        return "";
-    }
-
-    int prob_n = 0;
-    std::string result;
-
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-
-        result += text;
-
-        const int n_tokens = whisper_full_n_tokens(ctx, i);
-        for (int j = 0; j < n_tokens; ++j) {
-            const auto token = whisper_full_get_token_data(ctx, i, j);
-
-            prob += token.p;
-            ++prob_n;
-        }
-    }
-
-    if (prob_n > 0) {
-        prob /= prob_n;
-    }
-
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-
-    return result;
-}
-
-// compute similarity between two strings using Levenshtein distance
-float similarity(const std::string & s0, const std::string & s1) {
-    const size_t len0 = s0.size() + 1;
-    const size_t len1 = s1.size() + 1;
-
-    std::vector<int> col(len1, 0);
-    std::vector<int> prevCol(len1, 0);
-
-    for (size_t i = 0; i < len1; i++) {
-        prevCol[i] = i;
-    }
-
-    for (size_t i = 0; i < len0; i++) {
-        col[0] = i;
-        for (size_t j = 1; j < len1; j++) {
-            col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
-        }
-        col.swap(prevCol);
-    }
-
-    const float dist = prevCol[len1 - 1];
-
-    return 1.0f - (dist / std::max(s0.size(), s1.size()));
-}
-
-// generated with ChatGPT
-std::map<std::string, std::string> k_prompts = {
-    { "Santa",
-R"(Kid: Hi Santa! Are you real?
-Santa: Of course I am, my dear! Ho ho ho!
-Kid: Can you please bring me a new toy for Christmas?
-Santa: I'll see what I can do, but you have to make sure to be a good boy or girl and listen to your parents.
-Kid: I will, Santa! Thank you!
-Santa: You're welcome, little one. Merry Christmas! Ho ho ho!
-Kid: Can you tell me how you deliver all the presents to all the kids in the world in one night?
-Santa: It's a secret, but I have a lot of help from my elves and my magical sleigh. And I have a special route that I follow to make sure I visit every child.
-Kid: Wow, that's amazing! Can I please have a ride in your sleigh sometime?
-Santa: I'm sorry, but only good boys and girls get to ride in my sleigh.
-)" },
-    { "Kid",
-R"(Kid: Hi Santa! Are you real?
-Santa: Of course I am, my dear! Ho ho ho!
-Kid: Can you please bring me a new toy for Christmas?
-Santa: I'll see what I can do, but you have to make sure to be a good boy or girl and listen to your parents.
-Kid: I will, Santa! Thank you!
-Kid: Can you tell me how you deliver all the presents to all the kids in the world in one night?
-Santa: It's a secret, but I have a lot of help from my elves and my magical sleigh. And I have a special route that I follow to make sure I visit every child.
-Kid: Wow, that's amazing! Can I please have a ride in your sleigh sometime?
-)" },
-};
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx_wsp = whisper_init(params.model_wsp.c_str());
-
-    // gpt init
-
-    struct gpt2_context * ctx_gpt = gpt2_init(params.model_gpt.c_str());
-
-    // print some info about the processing
-    {
-        fprintf(stderr, "\n");
-        if (!whisper_is_multilingual(ctx_wsp)) {
-            if (params.language != "en" || params.translate) {
-                params.language = "en";
-                params.translate = false;
-                fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-            }
-        }
-        fprintf(stderr, "%s: processing, %d threads, lang = %s, task = %s, timestamps = %d ...\n",
-                __func__,
-                params.n_threads,
-                params.language.c_str(),
-                params.translate ? "translate" : "transcribe",
-                params.no_timestamps ? 0 : 1);
-
-        fprintf(stderr, "\n");
-    }
-
-
-    // init audio
-
-    audio_async audio(30*1000);
-    if (!audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
-        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
-        return 1;
-    }
-
-    audio.resume();
-
-    int n_iter = 0;
-
-    bool is_running  = true;
-    bool force_speak = params.person == "Kid";
-
-    float prob0 = 0.0f;
-    float prob  = 0.0f;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    if (k_prompts.find(params.person) == k_prompts.end()) {
-        fprintf(stderr, "%s: unknown person '%s'\n", __func__, params.person.c_str());
-        return 1;
-    }
-
-    gpt2_set_prompt(ctx_gpt, k_prompts.at(params.person).c_str());
-
-    const std::string person_other = params.person == "Santa" ? "Kid" : "Santa";
-    const int voice_id = params.person == "Santa" ? 5 : 2;
-
-    fprintf(stderr, "gpt-2: prompt_base:\n");
-    fprintf(stderr, "========================\n\n");
-    fprintf(stderr, "%s\n", gpt2_get_prompt(ctx_gpt));
-    fprintf(stderr, "========================\n\n");
-
-    // main loop
-    while (is_running) {
-        // handle Ctrl + C
-        {
-            SDL_Event event;
-            while (SDL_PollEvent(&event)) {
-                switch (event.type) {
-                    case SDL_QUIT:
-                        {
-                            is_running = false;
-                        } break;
-                    default:
-                        break;
-                }
-            }
-
-            if (!is_running) {
-                break;
-            }
-        }
-
-        // delay
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-        int64_t t_ms = 0;
-
-        {
-            audio.get(2000, pcmf32_cur);
-
-            if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1250, params.vad_thold, params.freq_thold, params.print_energy) || force_speak) {
-                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-
-                audio.get(params.voice_ms, pcmf32_cur);
-
-                std::string text_heard = "Hey little one, what do you want for Christmas?";
-                if (!force_speak) {
-                    text_heard = ::trim(::transcribe(ctx_wsp, params, pcmf32_cur, prob0, t_ms));
-                }
-
-                force_speak = false;
-
-                // remove text between brackets using regex
-                {
-                    std::regex re("\\[.*?\\]");
-                    text_heard = std::regex_replace(text_heard, re, "");
-                }
-
-                // remove text between brackets using regex
-                {
-                    std::regex re("\\(.*?\\)");
-                    text_heard = std::regex_replace(text_heard, re, "");
-                }
-
-                // remove all characters, except for letters, numbers, punctuation and ':', '\'', '-', ' '
-                text_heard = std::regex_replace(text_heard, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-
-                // take first line
-                text_heard = text_heard.substr(0, text_heard.find_first_of("\n"));
-
-                // remove leading and trailing whitespace
-                text_heard = std::regex_replace(text_heard, std::regex("^\\s+"), "");
-                text_heard = std::regex_replace(text_heard, std::regex("\\s+$"), "");
-
-                const std::vector<gpt_vocab::id> tokens = gpt2_tokenize(ctx_gpt, text_heard.c_str());
-
-                if (text_heard.empty() || tokens.empty()) {
-                    fprintf(stdout, "%s: Heard nothing, skipping ...\n", __func__);
-                    audio.clear();
-
-                    continue;
-                }
-
-                fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", text_heard.c_str(), "\033[0m", (int) t_ms);
-
-                std::string prompt_base = gpt2_get_prompt(ctx_gpt);
-
-                std::string text_to_speak;
-
-                {
-                    text_heard = person_other + ": " + text_heard;
-
-                    text_to_speak = gpt2_gen_text(ctx_gpt, (prompt_base + text_heard + "\n").c_str(), params.max_tokens);
-                    text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-                    text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of("\n"));
-
-                    // remove first 2 lines of base prompt
-                    if (n_iter > 4) {
-                        {
-                            const size_t pos = prompt_base.find_first_of("\n");
-                            if (pos != std::string::npos) {
-                                prompt_base = prompt_base.substr(pos + 1);
-                            }
-                        }
-                        {
-                            const size_t pos = prompt_base.find_first_of("\n");
-                            if (pos != std::string::npos) {
-                                prompt_base = prompt_base.substr(pos + 1);
-                            }
-                        }
-                    }
-
-                    prompt_base += text_heard + "\n" + text_to_speak + "\n";
-                }
-
-                printf("%s\n", text_to_speak.c_str());
-
-                //printf("========================\n");
-                //printf("gpt-2: prompt_base:\n'%s'\n", prompt_base.c_str());
-                //printf("========================\n");
-
-                gpt2_set_prompt(ctx_gpt, prompt_base.c_str());
-
-                text_to_speak = ::replace(text_to_speak, params.person + ": ", "");
-                system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
-
-                audio.clear();
-
-                ++n_iter;
-            }
-        }
-    }
-
-    audio.pause();
-
-    whisper_print_timings(ctx_wsp);
-    whisper_free(ctx_wsp);
-
-    return 0;
-}
--- a/examples/twitch.sh
+++ b/examples/twitch.sh
@ -1,109 +0,0 @@
-#!/bin/bash
-#
-# Transcribe twitch.tv livestream by feeding audio input to whisper.cpp at regular intervals
-# Thanks to @keyehzy
-# ref: https://github.com/ggerganov/whisper.cpp/issues/209
-#
-# The script currently depends on the third-party tool "streamlink"
-# On Mac OS, you can install it via "brew install streamlink"
-#
-
-set -eo pipefail
-
-step=10
-model=base.en
-threads=4
-
-help()
-{
-    echo "Example program for captioning a livestream from twitch.tv."
-    echo
-    echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
-    echo "options:"
-    echo "-s       Step in seconds (default is $step)."
-    echo "-m       Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large' (default is '$model')."
-    echo "-t       Number of threads to use."
-    echo "-h       Print this help page."
-    echo
-}
-
-check_requirements()
-{
-    if ! command -v ./main &>/dev/null; then
-        echo "whisper.cpp main executable is required (make)"
-        exit 1
-    fi
-
-    if ! command -v streamlink &>/dev/null; then
-        echo "streamlink is required (https://streamlink.github.io)"
-        exit 1
-    fi
-
-    if ! command -v ffmpeg &>/dev/null; then
-        echo "ffmpeg is required (https://ffmpeg.org)"
-        exit 1
-    fi
-}
-
-check_requirements
-
-while getopts ":s:m:t:h" option; do
-    case $option in
-	s)
-            step=$OPTARG;;
-	m)
-            model=$OPTARG;;
-	t)
-	    threads=$OPTARG;;
-	h)
-            help
-            exit;;
-	\?)
-	    help
-	    exit;;
-    esac
-done
-
-url=${@:$OPTIND:1}
-
-if [ -z $url ]; then
-    help
-    exit
-fi
-
-echo "Piping from streamlink url=$url model=$model step=$step threads=$threads"
-streamlink $url best -O 2>/dev/null | ffmpeg -loglevel quiet -i - -y -probesize 32 -y -ar 16000 -ac 1 -acodec pcm_s16le /tmp/whisper-live0.wav &
-
-if [ $? -ne 0 ]; then
-    printf "error: ffmpeg failed\n"
-    exit 1
-fi
-
-echo "Buffering stream... (this should take $step seconds)"
-sleep $(($step))
-
-set +e
-
-echo "Starting..."
-
-i=0
-SECONDS=0
-while true
-do
-    err=1
-    while [ $err -ne 0 ]; do
-        if [ $i -gt 0 ]; then
-            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.wav -y -ss $(($i*$step-1)).5 -t $step -c copy /tmp/whisper-live.wav 2> /tmp/whisper-live.err
-        else
-            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.wav -y -ss $(($i*$step)) -t $step -c copy /tmp/whisper-live.wav 2> /tmp/whisper-live.err
-        fi
-        err=$(cat /tmp/whisper-live.err | wc -l)
-    done
-
-    ./main -t $threads -m ./models/ggml-$model.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
-
-    while [ $SECONDS -lt $((($i+1)*$step)) ]; do
-        sleep 1
-    done
-    ((i=i+1))
-done
--- a/examples/whisper.nvim/README.md
+++ b/examples/whisper.nvim/README.md
@ -1,92 +0,0 @@
-# whisper.nvim
-
-Speech-to-text in Neovim
-
-The transcription is performed on the CPU and no data leaves your computer. Works best on Apple Silicon devices.
-
-https://user-images.githubusercontent.com/1991296/198382564-784e9663-2037-4d04-99b8-f39136929b7e.mp4
-
-## Usage
-
- Simply press `Ctrl-G` in `INSERT`, `VISUAL` or `NORMAL` mode and say something
- When you are done - press `Ctrl-C` to end the transcription and insert the transcribed text under the cursor
-
-## Installation
-
-*Note: this is a bit tedious and hacky atm, but I hope it will be improved with time*
-
- Clone this repo and build the `stream` tool:
-
-  ```
-  git clone https://github.com/ggerganov/whisper.cpp
-  cd whisper.cpp
-  make stream
-  ```
-
- Download the `base.en` Whisper model (140 MB):
-
-  ```
-  ./models/download-ggml-model.sh base.en
-  ```
-
- Place the [whisper.nvim](whisper.nvim) script somewhere in your PATH and give it execute permissions:
-
-  ```
-  cp examples/whisper.nvim/whisper.nvim ~/bin/
-  chmod u+x ~/bin/whisper.nvim
-  ```
-
- Fine-tune the script to your preference and machine parameters:
-
-  ```
-  ./stream -t 8 -m models/ggml-base.en.bin --step 350 --length 10000 -f /tmp/whisper.nvim 2> /dev/null
-  ```
-
-  On slower machines, try to increase the `step` parameter.
-
- Add the following shortcuts to your `~/.config/nvim/init.vim`:
-
-  ```
-  inoremap <C-G>  <C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
-  nnoremap <C-G>       :!whisper.nvim<CR>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR>"ap
-  vnoremap <C-G> c<C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
-  ```
-  
-  Explanation: pressing `Ctrl-G` runs the [whisper.nvim](whisper.nvim) script which in turn calls the `stream` binary to transcribe your speech through the microphone. The results from the transcription are continuously dumped into `/tmp/whisper.nvim`. After you kill the program with `Ctrl-C`, the vim command grabs the last line from the `/tmp/whisper.nvim` file and puts it under the cursor.
-  
-  Probably there is a much more intelligent way to achieve all this, but this is what I could hack in an hour. Any suggestions how to improve this are welcome.
-  
-You are now ready to use speech-to-text in Neovim!
-
-## TODO
-
-There are a lot of ways to improve this idea and I don't have much experience with Vim plugin programming, so contributions are welcome! 
-
- [ ] **Wrap this into a plugin**
-  
-  It would be great to make a standalone plugin out of this that can be installed with `vim-plug` or similar
-  
- [ ] **Simplify the `init.vim` mappings (maybe factor out the common call into a separate function)**
- [ ] **Add Copilot/GPT-3 integration**
-
-  This is probably a very long shot, but I think it will be very cool to have the functionality to select some code and then hit Ctrl-G and say something like:
-  
-  *"refactor this using stl containers"*
-  
-  or
-  
-  *"optimize by sorting the data first"*
-  
-  The plugin would then make an appropriate query using the selected text and code context to Copilot or GPT-3 and return the result.
-  
-  Here is a proof-of-concept:
-  
-  https://user-images.githubusercontent.com/1991296/199078847-0278fcde-5667-4748-ba0d-7d55381d6047.mp4
-    
-  https://user-images.githubusercontent.com/1991296/200067939-f98d2ac2-7519-438a-85f9-79db0841ba4f.mp4
-  
-  For explanation how this works see: https://twitter.com/ggerganov/status/1587168771789258756
-
-## Discussion
-
-If you find this idea interesting, you can join the discussion here: https://github.com/ggerganov/whisper.cpp/discussions/108
--- a/examples/whisper.nvim/whisper.nvim
+++ b/examples/whisper.nvim/whisper.nvim
@ -1,50 +0,0 @@
-#!/bin/bash
-
-# INSTRUCTIONS
-#
-# This simple script is called by Neovim to capture audio from the microphone and transcribe it with Whisper.
-# In order for this to work, you need to clone the whisper.cpp repo and build the 'stream' tool
-#
-#   git clone https://github.com/ggerganov/whisper.cpp
-#   cd whisper.cpp
-#   make stream
-#
-# Also, make sure the current script is in your PATH env variable. You should be able to run the following command:
-#
-#   whisper.nvim
-#
-# Next, export the path to the whisper.cpp repository via the WHISPER_CPP_HOME env variable:
-#
-#   export WHISPER_CPP_HOME=/path/to/whisper.cpp
-#
-# Finally, add the following lines to your ~/.config/nvim/init.vim:
-#
-#   inoremap <C-G>  <C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
-#   nnoremap <C-G>       :!whisper.nvim<CR>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR>"ap
-#   vnoremap <C-G> c<C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
-#
-# This allows you to press Ctrl-G in order to capture audio from the microphone and transcribe it.
-# When you are done speaking - press Ctrl-C
-#
-
-# the Whisper model to use
-model="base.en"
-
-# export the path to the whisper.cpp repo in the WHISPER_CPP_HOME env variable
-# https://github.com/ggerganov/whisper.cpp
-cd ${WHISPER_CPP_HOME}
-
-if [ ! -f ./stream ] ; then
-    echo "whisper.nvim: the 'stream' executable was not found! WHISPER_CPP_HOME=${WHISPER_CPP_HOME}" > /tmp/whisper.nvim
-    exit 1
-fi
-
-if [ ! -f ./models/ggml-${model}.bin ] ; then
-    echo "whisper.nvim: the '$model' model was not found! WHISPER_CPP_HOME=${WHISPER_CPP_HOME}" > /tmp/whisper.nvim
-    exit 2
-fi
-
-# fine-tune the parameters according to your machine specs
-./stream -t 8 -m models/ggml-base.en.bin --step 350 --length 10000 -f /tmp/whisper.nvim 2> /dev/null
-
-exit 0
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -1,21 +0,0 @@
-# whisper.objc
-
-Minimal Obj-C application for automatic offline speech recognition.
-The inference runs locally, on-device.
-
-https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
-
-Real-time transcription demo:
-
-https://user-images.githubusercontent.com/1991296/204126266-ce4177c6-6eca-4bd9-bca8-0e46d9da2364.mp4
-
-## Usage
-
-```java
-git clone https://github.com/ggerganov/whisper.cpp
-open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/
-```
-
-Make sure to build the project in `Release`:
-
-<img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
@ -1,384 +0,0 @@
-// !$*UTF8*$!
-{
-	archiveVersion = 1;
-	classes = {
-	};
-	objectVersion = 56;
-	objects = {
-
-/* Begin PBXBuildFile section */
-		18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
-		18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
-		18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
-		18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8229052BDF00BD2A04 /* Main.storyboard */; };
-		18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
-		18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
-		18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
-		18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; };
-		18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
-		18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXFileReference section */
-		18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
-		18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
-		18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
-		18627C7C29052BDF00BD2A04 /* SceneDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SceneDelegate.h; sourceTree = "<group>"; };
-		18627C7D29052BDF00BD2A04 /* SceneDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SceneDelegate.m; sourceTree = "<group>"; };
-		18627C7F29052BDF00BD2A04 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = "<group>"; };
-		18627C8029052BDF00BD2A04 /* ViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ViewController.m; sourceTree = "<group>"; };
-		18627C8329052BDF00BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
-		18627C8529052BE000BD2A04 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
-		18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
-		18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
-		18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
-		18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../whisper.h; sourceTree = "<group>"; };
-		18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../whisper.cpp; sourceTree = "<group>"; };
-		18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml.c; sourceTree = "<group>"; };
-		18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml.h; sourceTree = "<group>"; };
-		18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
-		18627C7329052BDF00BD2A04 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
-		18627C6D29052BDF00BD2A04 = {
-			isa = PBXGroup;
-			children = (
-				18627C7829052BDF00BD2A04 /* whisper.objc */,
-				18627C7729052BDF00BD2A04 /* Products */,
-			);
-			sourceTree = "<group>";
-		};
-		18627C7729052BDF00BD2A04 /* Products */ = {
-			isa = PBXGroup;
-			children = (
-				18627C7629052BDF00BD2A04 /* whisper.objc.app */,
-			);
-			name = Products;
-			sourceTree = "<group>";
-		};
-		18627C7829052BDF00BD2A04 /* whisper.objc */ = {
-			isa = PBXGroup;
-			children = (
-				18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */,
-				18627C9729052C6600BD2A04 /* ggml.h */,
-				18627C9529052C5800BD2A04 /* ggml.c */,
-				18627C9329052C4900BD2A04 /* whisper.cpp */,
-				18627C9229052C2B00BD2A04 /* whisper.h */,
-				18627C7929052BDF00BD2A04 /* AppDelegate.h */,
-				18627C7A29052BDF00BD2A04 /* AppDelegate.m */,
-				18627C7C29052BDF00BD2A04 /* SceneDelegate.h */,
-				18627C7D29052BDF00BD2A04 /* SceneDelegate.m */,
-				18627C7F29052BDF00BD2A04 /* ViewController.h */,
-				18627C8029052BDF00BD2A04 /* ViewController.m */,
-				18627C8229052BDF00BD2A04 /* Main.storyboard */,
-				18627C8529052BE000BD2A04 /* Assets.xcassets */,
-				18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */,
-				18627C8A29052BE000BD2A04 /* Info.plist */,
-				18627C8B29052BE000BD2A04 /* main.m */,
-			);
-			path = whisper.objc;
-			sourceTree = "<group>";
-		};
-/* End PBXGroup section */
-
-/* Begin PBXNativeTarget section */
-		18627C7529052BDF00BD2A04 /* whisper.objc */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */;
-			buildPhases = (
-				18627C7229052BDF00BD2A04 /* Sources */,
-				18627C7329052BDF00BD2A04 /* Frameworks */,
-				18627C7429052BDF00BD2A04 /* Resources */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = whisper.objc;
-			productName = whisper.objc;
-			productReference = 18627C7629052BDF00BD2A04 /* whisper.objc.app */;
-			productType = "com.apple.product-type.application";
-		};
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
-		18627C6E29052BDF00BD2A04 /* Project object */ = {
-			isa = PBXProject;
-			attributes = {
-				BuildIndependentTargetsInParallel = 1;
-				LastUpgradeCheck = 1400;
-				TargetAttributes = {
-					18627C7529052BDF00BD2A04 = {
-						CreatedOnToolsVersion = 14.0.1;
-					};
-				};
-			};
-			buildConfigurationList = 18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */;
-			compatibilityVersion = "Xcode 14.0";
-			developmentRegion = en;
-			hasScannedForEncodings = 0;
-			knownRegions = (
-				en,
-				Base,
-			);
-			mainGroup = 18627C6D29052BDF00BD2A04;
-			productRefGroup = 18627C7729052BDF00BD2A04 /* Products */;
-			projectDirPath = "";
-			projectRoot = "";
-			targets = (
-				18627C7529052BDF00BD2A04 /* whisper.objc */,
-			);
-		};
-/* End PBXProject section */
-
-/* Begin PBXResourcesBuildPhase section */
-		18627C7429052BDF00BD2A04 /* Resources */ = {
-			isa = PBXResourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
-				18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
-				18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */,
-				18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXResourcesBuildPhase section */
-
-/* Begin PBXSourcesBuildPhase section */
-		18627C7229052BDF00BD2A04 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
-				18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
-				18627C9629052C5800BD2A04 /* ggml.c in Sources */,
-				18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
-				18627C8C29052BE000BD2A04 /* main.m in Sources */,
-				18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXSourcesBuildPhase section */
-
-/* Begin PBXVariantGroup section */
-		18627C8229052BDF00BD2A04 /* Main.storyboard */ = {
-			isa = PBXVariantGroup;
-			children = (
-				18627C8329052BDF00BD2A04 /* Base */,
-			);
-			name = Main.storyboard;
-			sourceTree = "<group>";
-		};
-		18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */ = {
-			isa = PBXVariantGroup;
-			children = (
-				18627C8829052BE000BD2A04 /* Base */,
-			);
-			name = LaunchScreen.storyboard;
-			sourceTree = "<group>";
-		};
-/* End PBXVariantGroup section */
-
-/* Begin XCBuildConfiguration section */
-		18627C8D29052BE000BD2A04 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_ANALYZER_NONNULL = YES;
-				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
-				CLANG_ENABLE_MODULES = YES;
-				CLANG_ENABLE_OBJC_ARC = YES;
-				CLANG_ENABLE_OBJC_WEAK = YES;
-				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
-				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INFINITE_RECURSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
-				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
-				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
-				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
-				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
-				CLANG_WARN_STRICT_PROTOTYPES = YES;
-				CLANG_WARN_SUSPICIOUS_MOVE = YES;
-				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-				CLANG_WARN_UNREACHABLE_CODE = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				COPY_PHASE_STRIP = NO;
-				DEBUG_INFORMATION_FORMAT = dwarf;
-				ENABLE_STRICT_OBJC_MSGSEND = YES;
-				ENABLE_TESTABILITY = YES;
-				GCC_C_LANGUAGE_STANDARD = gnu11;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
-				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
-				MTL_FAST_MATH = YES;
-				ONLY_ACTIVE_ARCH = YES;
-				SDKROOT = iphoneos;
-			};
-			name = Debug;
-		};
-		18627C8E29052BE000BD2A04 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_ANALYZER_NONNULL = YES;
-				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
-				CLANG_ENABLE_MODULES = YES;
-				CLANG_ENABLE_OBJC_ARC = YES;
-				CLANG_ENABLE_OBJC_WEAK = YES;
-				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
-				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INFINITE_RECURSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
-				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
-				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
-				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
-				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
-				CLANG_WARN_STRICT_PROTOTYPES = YES;
-				CLANG_WARN_SUSPICIOUS_MOVE = YES;
-				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-				CLANG_WARN_UNREACHABLE_CODE = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				COPY_PHASE_STRIP = NO;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				ENABLE_NS_ASSERTIONS = NO;
-				ENABLE_STRICT_OBJC_MSGSEND = YES;
-				GCC_C_LANGUAGE_STANDARD = gnu11;
-				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
-				MTL_ENABLE_DEBUG_INFO = NO;
-				MTL_FAST_MATH = YES;
-				SDKROOT = iphoneos;
-				VALIDATE_PRODUCT = YES;
-			};
-			name = Release;
-		};
-		18627C9029052BE000BD2A04 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CODE_SIGN_STYLE = Automatic;
-				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = P8JZH34X63;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GENERATE_INFOPLIST_FILE = YES;
-				INFOPLIST_FILE = whisper.objc/Info.plist;
-				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
-				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
-				INFOPLIST_KEY_UIMainStoryboardFile = Main;
-				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-				LD_RUNPATH_SEARCH_PATHS = (
-					"$(inherited)",
-					"@executable_path/Frameworks",
-				);
-				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SWIFT_EMIT_LOC_STRINGS = YES;
-				TARGETED_DEVICE_FAMILY = "1,2";
-			};
-			name = Debug;
-		};
-		18627C9129052BE000BD2A04 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CODE_SIGN_STYLE = Automatic;
-				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = P8JZH34X63;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GENERATE_INFOPLIST_FILE = YES;
-				INFOPLIST_FILE = whisper.objc/Info.plist;
-				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
-				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
-				INFOPLIST_KEY_UIMainStoryboardFile = Main;
-				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
-				LD_RUNPATH_SEARCH_PATHS = (
-					"$(inherited)",
-					"@executable_path/Frameworks",
-				);
-				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SWIFT_EMIT_LOC_STRINGS = YES;
-				TARGETED_DEVICE_FAMILY = "1,2";
-			};
-			name = Release;
-		};
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
-		18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				18627C8D29052BE000BD2A04 /* Debug */,
-				18627C8E29052BE000BD2A04 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				18627C9029052BE000BD2A04 /* Debug */,
-				18627C9129052BE000BD2A04 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-/* End XCConfigurationList section */
-	};
-	rootObject = 18627C6E29052BDF00BD2A04 /* Project object */;
-}
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<Workspace
-   version = "1.0">
-   <FileRef
-      location = "self:">
-   </FileRef>
-</Workspace>
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>IDEDidComputeMac32BitWarning</key>
-	<true/>
-</dict>
-</plist>
--- a/examples/whisper.objc/whisper.objc/AppDelegate.h
+++ b/examples/whisper.objc/whisper.objc/AppDelegate.h
@ -1,14 +0,0 @@
-//
-//  AppDelegate.h
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import <UIKit/UIKit.h>
-
-@interface AppDelegate : UIResponder <UIApplicationDelegate>
-
-
-@end
-
--- a/examples/whisper.objc/whisper.objc/AppDelegate.m
+++ b/examples/whisper.objc/whisper.objc/AppDelegate.m
@ -1,40 +0,0 @@
-//
-//  AppDelegate.m
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import "AppDelegate.h"
-
-@interface AppDelegate ()
-
-@end
-
-@implementation AppDelegate
-
-
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
-    // Override point for customization after application launch.
-    return YES;
-}
-
-
-#pragma mark - UISceneSession lifecycle
-
-
- (UISceneConfiguration *)application:(UIApplication *)application configurationForConnectingSceneSession:(UISceneSession *)connectingSceneSession options:(UISceneConnectionOptions *)options {
-    // Called when a new scene session is being created.
-    // Use this method to select a configuration to create the new scene with.
-    return [[UISceneConfiguration alloc] initWithName:@"Default Configuration" sessionRole:connectingSceneSession.role];
-}
-
-
- (void)application:(UIApplication *)application didDiscardSceneSessions:(NSSet<UISceneSession *> *)sceneSessions {
-    // Called when the user discards a scene session.
-    // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
-    // Use this method to release any resources that were specific to the discarded scenes, as they will not return.
-}
-
-
-@end
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/AccentColor.colorset/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/AccentColor.colorset/Contents.json
@ -1,11 +0,0 @@
-{
-  "colors" : [
-    {
-      "idiom" : "universal"
-    }
-  ],
-  "info" : {
-    "author" : "xcode",
-    "version" : 1
-  }
-}
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/AppIcon.appiconset/Contents.json
@ -1,13 +0,0 @@
-{
-  "images" : [
-    {
-      "idiom" : "universal",
-      "platform" : "ios",
-      "size" : "1024x1024"
-    }
-  ],
-  "info" : {
-    "author" : "xcode",
-    "version" : 1
-  }
-}
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json
@ -1,6 +0,0 @@
-{
-  "info" : {
-    "author" : "xcode",
-    "version" : 1
-  }
-}
--- a/examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard
+++ b/examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard
@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
-    <dependencies>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
-        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="EHf-IW-A2E">
-            <objects>
-                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
-                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
-                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
-                    </view>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="53" y="375"/>
-        </scene>
-    </scenes>
-</document>
--- a/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
+++ b/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
@ -1,102 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
-    <device id="retina6_0" orientation="portrait" appearance="light"/>
-    <dependencies>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
-        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
-        <capability name="System colors in document resources" minToolsVersion="11.0"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="tne-QT-ifu">
-            <objects>
-                <viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
-                        <rect key="frame" x="0.0" y="0.0" width="390" height="844"/>
-                        <autoresizingMask key="autoresizingMask" flexibleMinX="YES" widthSizable="YES" flexibleMinY="YES" heightSizable="YES"/>
-                        <subviews>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="VOi-PT-Rbu">
-                                <rect key="frame" x="35" y="121" width="156" height="49"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
-                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
-                                <state key="normal" title="Start Capturing">
-                                    <color key="titleColor" systemColor="labelColor"/>
-                                </state>
-                                <connections>
-                                    <action selector="toggleCapture:" destination="BYZ-38-t0r" eventType="touchUpInside" id="BuO-Wf-RgV"/>
-                                </connections>
-                            </button>
-                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="Status: Idle" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Tgu-2q-eHQ">
-                                <rect key="frame" x="35" y="78" width="232" height="21"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <fontDescription key="fontDescription" type="system" pointSize="17"/>
-                                <nil key="textColor"/>
-                                <nil key="highlightedColor"/>
-                            </label>
-                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" fixedFrame="YES" text="Record some speech and press &quot;Transcribe&quot;. The result will be displayed here." textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="mv2-KD-7jn">
-                                <rect key="frame" x="35" y="248" width="320" height="300"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" systemColor="systemBackgroundColor"/>
-                                <color key="textColor" systemColor="labelColor"/>
-                                <fontDescription key="fontDescription" name="Georgia" family="Georgia" pointSize="16"/>
-                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
-                            </textView>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="Brs-xi-o8i">
-                                <rect key="frame" x="35" y="191" width="156" height="49"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
-                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
-                                <state key="normal" title="Transcribe">
-                                    <color key="titleColor" systemColor="labelColor"/>
-                                </state>
-                                <connections>
-                                    <action selector="onTranscribe:" destination="BYZ-38-t0r" eventType="touchUpInside" id="ond-bx-48O"/>
-                                    <action selector="onTranscribePrepare:" destination="BYZ-38-t0r" eventType="touchDown" id="16T-dN-dfB"/>
-                                </connections>
-                            </button>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="AaW-T2-Ndw">
-                                <rect key="frame" x="199" y="191" width="156" height="49"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
-                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
-                                <state key="normal" title="Real-time">
-                                    <color key="titleColor" systemColor="labelColor"/>
-                                </state>
-                                <connections>
-                                    <action selector="onRealtime:" destination="BYZ-38-t0r" eventType="touchUpInside" id="nhn-jT-aQJ"/>
-                                </connections>
-                            </button>
-                        </subviews>
-                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
-                        <color key="backgroundColor" systemColor="systemBackgroundColor"/>
-                        <constraints>
-                            <constraint firstItem="Brs-xi-o8i" firstAttribute="trailing" secondItem="VOi-PT-Rbu" secondAttribute="trailing" id="8mF-AW-cbc"/>
-                        </constraints>
-                    </view>
-                    <connections>
-                        <outlet property="buttonRealtime" destination="AaW-T2-Ndw" id="gcU-Ol-BOo"/>
-                        <outlet property="buttonToggleCapture" destination="VOi-PT-Rbu" id="nis-VC-DQO"/>
-                        <outlet property="buttonTranscribe" destination="Brs-xi-o8i" id="N8h-9W-ywb"/>
-                        <outlet property="labelStatusInp" destination="Tgu-2q-eHQ" id="1hH-Ql-K6j"/>
-                        <outlet property="textviewResult" destination="mv2-KD-7jn" id="RBw-0L-iGj"/>
-                    </connections>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="30.769230769230766" y="-28.436018957345969"/>
-        </scene>
-    </scenes>
-    <resources>
-        <systemColor name="labelColor">
-            <color red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
-        </systemColor>
-        <systemColor name="opaqueSeparatorColor">
-            <color red="0.77647058823529413" green="0.77647058823529413" blue="0.78431372549019607" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
-        </systemColor>
-        <systemColor name="systemBackgroundColor">
-            <color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-        </systemColor>
-    </resources>
-</document>
--- a/examples/whisper.objc/whisper.objc/Info.plist
+++ b/examples/whisper.objc/whisper.objc/Info.plist
@ -1,27 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>NSMicrophoneUsageDescription</key>
-	<string>This app requires microphone access in order to transcribe speech</string>
-	<key>UIApplicationSceneManifest</key>
-	<dict>
-		<key>UIApplicationSupportsMultipleScenes</key>
-		<false/>
-		<key>UISceneConfigurations</key>
-		<dict>
-			<key>UIWindowSceneSessionRoleApplication</key>
-			<array>
-				<dict>
-					<key>UISceneConfigurationName</key>
-					<string>Default Configuration</string>
-					<key>UISceneDelegateClassName</key>
-					<string>SceneDelegate</string>
-					<key>UISceneStoryboardFile</key>
-					<string>Main</string>
-				</dict>
-			</array>
-		</dict>
-	</dict>
-</dict>
-</plist>
--- a/examples/whisper.objc/whisper.objc/SceneDelegate.h
+++ b/examples/whisper.objc/whisper.objc/SceneDelegate.h
@ -1,15 +0,0 @@
-//
-//  SceneDelegate.h
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import <UIKit/UIKit.h>
-
-@interface SceneDelegate : UIResponder <UIWindowSceneDelegate>
-
-@property (strong, nonatomic) UIWindow * window;
-
-@end
-
--- a/examples/whisper.objc/whisper.objc/SceneDelegate.m
+++ b/examples/whisper.objc/whisper.objc/SceneDelegate.m
@ -1,57 +0,0 @@
-//
-//  SceneDelegate.m
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import "SceneDelegate.h"
-
-@interface SceneDelegate ()
-
-@end
-
-@implementation SceneDelegate
-
-
- (void)scene:(UIScene *)scene willConnectToSession:(UISceneSession *)session options:(UISceneConnectionOptions *)connectionOptions {
-    // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
-    // If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
-    // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
-}
-
-
- (void)sceneDidDisconnect:(UIScene *)scene {
-    // Called as the scene is being released by the system.
-    // This occurs shortly after the scene enters the background, or when its session is discarded.
-    // Release any resources associated with this scene that can be re-created the next time the scene connects.
-    // The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
-}
-
-
- (void)sceneDidBecomeActive:(UIScene *)scene {
-    // Called when the scene has moved from an inactive state to an active state.
-    // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
-}
-
-
- (void)sceneWillResignActive:(UIScene *)scene {
-    // Called when the scene will move from an active state to an inactive state.
-    // This may occur due to temporary interruptions (ex. an incoming phone call).
-}
-
-
- (void)sceneWillEnterForeground:(UIScene *)scene {
-    // Called as the scene transitions from the background to the foreground.
-    // Use this method to undo the changes made on entering the background.
-}
-
-
- (void)sceneDidEnterBackground:(UIScene *)scene {
-    // Called as the scene transitions from the foreground to the background.
-    // Use this method to save data, release shared resources, and store enough scene-specific state information
-    // to restore the scene back to its current state.
-}
-
-
-@end
--- a/examples/whisper.objc/whisper.objc/ViewController.h
+++ b/examples/whisper.objc/whisper.objc/ViewController.h
@ -1,45 +0,0 @@
-//
-//  ViewController.h
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import <UIKit/UIKit.h>
-
-#import <AVFoundation/AVFoundation.h>
-#import <AudioToolbox/AudioQueue.h>
-
-#define NUM_BUFFERS 3
-#define MAX_AUDIO_SEC 30
-#define SAMPLE_RATE 16000
-
-struct whisper_context;
-
-typedef struct
-{
-    int ggwaveId;
-    bool isCapturing;
-    bool isTranscribing;
-    bool isRealtime;
-    UILabel * labelReceived;
-
-    AudioQueueRef queue;
-    AudioStreamBasicDescription dataFormat;
-    AudioQueueBufferRef buffers[NUM_BUFFERS];
-
-    int n_samples;
-    int16_t * audioBufferI16;
-    float   * audioBufferF32;
-
-    struct whisper_context * ctx;
-
-    void * vc;
-} StateInp;
-
-@interface ViewController : UIViewController
-{
-    StateInp stateInp;
-}
-
-@end
--- a/examples/whisper.objc/whisper.objc/ViewController.m
+++ b/examples/whisper.objc/whisper.objc/ViewController.m
@ -1,297 +0,0 @@
-//
-//  ViewController.m
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import "ViewController.h"
-
-#import "whisper.h"
-
-#define NUM_BYTES_PER_BUFFER 16*1024
-
-// callback used to process captured audio
-void AudioInputCallback(void * inUserData,
-                        AudioQueueRef inAQ,
-                        AudioQueueBufferRef inBuffer,
-                        const AudioTimeStamp * inStartTime,
-                        UInt32 inNumberPacketDescriptions,
-                        const AudioStreamPacketDescription * inPacketDescs);
-
-@interface ViewController ()
-
-@property (weak, nonatomic) IBOutlet UILabel    *labelStatusInp;
-@property (weak, nonatomic) IBOutlet UIButton   *buttonToggleCapture;
-@property (weak, nonatomic) IBOutlet UIButton   *buttonTranscribe;
-@property (weak, nonatomic) IBOutlet UIButton   *buttonRealtime;
-@property (weak, nonatomic) IBOutlet UITextView *textviewResult;
-
-@end
-
-@implementation ViewController
-
- (void)setupAudioFormat:(AudioStreamBasicDescription*)format
-{
-    format->mSampleRate       = WHISPER_SAMPLE_RATE;
-    format->mFormatID         = kAudioFormatLinearPCM;
-    format->mFramesPerPacket  = 1;
-    format->mChannelsPerFrame = 1;
-    format->mBytesPerFrame    = 2;
-    format->mBytesPerPacket   = 2;
-    format->mBitsPerChannel   = 16;
-    format->mReserved         = 0;
-    format->mFormatFlags      = kLinearPCMFormatFlagIsSignedInteger;
-}
-
- (void)viewDidLoad {
-    [super viewDidLoad];
-
-    // whisper.cpp initialization
-    {
-        // load the model
-        NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"ggml-base.en" ofType:@"bin"];
-
-        // check if the model exists
-        if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
-            NSLog(@"Model file not found");
-            return;
-        }
-
-        NSLog(@"Loading model from %@", modelPath);
-
-        // create ggml context
-        stateInp.ctx = whisper_init([modelPath UTF8String]);
-
-        // check if the model was loaded successfully
-        if (stateInp.ctx == NULL) {
-            NSLog(@"Failed to load model");
-            return;
-        }
-    }
-
-    // initialize audio format and buffers
-    {
-        [self setupAudioFormat:&stateInp.dataFormat];
-
-        stateInp.n_samples = 0;
-        stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
-        stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
-    }
-
-    stateInp.isTranscribing = false;
-    stateInp.isRealtime = false;
-}
-
-(IBAction) stopCapturing {
-    NSLog(@"Stop capturing");
-
-    _labelStatusInp.text = @"Status: Idle";
-
-    [_buttonToggleCapture setTitle:@"Start capturing" forState:UIControlStateNormal];
-    [_buttonToggleCapture setBackgroundColor:[UIColor grayColor]];
-
-    stateInp.isCapturing = false;
-
-    AudioQueueStop(stateInp.queue, true);
-    for (int i = 0; i < NUM_BUFFERS; i++) {
-        AudioQueueFreeBuffer(stateInp.queue, stateInp.buffers[i]);
-    }
-
-    AudioQueueDispose(stateInp.queue, true);
-}
-
- (IBAction)toggleCapture:(id)sender {
-    if (stateInp.isCapturing) {
-        // stop capturing
-        [self stopCapturing];
-
-        return;
-    }
-
-    // initiate audio capturing
-    NSLog(@"Start capturing");
-
-    stateInp.n_samples = 0;
-    stateInp.vc = (__bridge void *)(self);
-
-    OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
-                                         AudioInputCallback,
-                                         &stateInp,
-                                         CFRunLoopGetCurrent(),
-                                         kCFRunLoopCommonModes,
-                                         0,
-                                         &stateInp.queue);
-
-    if (status == 0) {
-        for (int i = 0; i < NUM_BUFFERS; i++) {
-            AudioQueueAllocateBuffer(stateInp.queue, NUM_BYTES_PER_BUFFER, &stateInp.buffers[i]);
-            AudioQueueEnqueueBuffer (stateInp.queue, stateInp.buffers[i], 0, NULL);
-        }
-
-        stateInp.isCapturing = true;
-        status = AudioQueueStart(stateInp.queue, NULL);
-        if (status == 0) {
-            _labelStatusInp.text = @"Status: Capturing";
-            [sender setTitle:@"Stop Capturing" forState:UIControlStateNormal];
-            [_buttonToggleCapture setBackgroundColor:[UIColor redColor]];
-        }
-    }
-
-    if (status != 0) {
-        [self stopCapturing];
-    }
-}
-
- (IBAction)onTranscribePrepare:(id)sender {
-    _textviewResult.text = @"Processing - please wait ...";
-
-    if (stateInp.isRealtime) {
-        [self onRealtime:(id)sender];
-    }
-
-    if (stateInp.isCapturing) {
-        [self stopCapturing];
-    }
-}
-
- (IBAction)onRealtime:(id)sender {
-    stateInp.isRealtime = !stateInp.isRealtime;
-
-    if (stateInp.isRealtime) {
-        [_buttonRealtime setBackgroundColor:[UIColor greenColor]];
-    } else {
-        [_buttonRealtime setBackgroundColor:[UIColor grayColor]];
-    }
-
-    NSLog(@"Realtime: %@", stateInp.isRealtime ? @"ON" : @"OFF");
-}
-
- (IBAction)onTranscribe:(id)sender {
-    if (stateInp.isTranscribing) {
-        return;
-    }
-
-    NSLog(@"Processing %d samples", stateInp.n_samples);
-
-    stateInp.isTranscribing = true;
-
-    // dispatch the model to a background thread
-    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-        // process captured audio
-        // convert I16 to F32
-        for (int i = 0; i < self->stateInp.n_samples; i++) {
-            self->stateInp.audioBufferF32[i] = (float)self->stateInp.audioBufferI16[i] / 32768.0f;
-        }
-
-        // run the model
-        struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-        // get maximum number of threads on this device (max 8)
-        const int max_threads = MIN(8, (int)[[NSProcessInfo processInfo] processorCount]);
-
-        params.print_realtime   = true;
-        params.print_progress   = false;
-        params.print_timestamps = true;
-        params.print_special    = false;
-        params.translate        = false;
-        params.language         = "en";
-        params.n_threads        = max_threads;
-        params.offset_ms        = 0;
-        params.no_context       = true;
-        params.single_segment   = self->stateInp.isRealtime;
-
-        CFTimeInterval startTime = CACurrentMediaTime();
-
-        whisper_reset_timings(self->stateInp.ctx);
-
-        if (whisper_full(self->stateInp.ctx, params, self->stateInp.audioBufferF32, self->stateInp.n_samples) != 0) {
-            NSLog(@"Failed to run the model");
-            self->_textviewResult.text = @"Failed to run the model";
-
-            return;
-        }
-
-        whisper_print_timings(self->stateInp.ctx);
-
-        CFTimeInterval endTime = CACurrentMediaTime();
-
-        NSLog(@"\nProcessing time: %5.3f, on %d threads", endTime - startTime, params.n_threads);
-
-        // result text
-        NSString *result = @"";
-
-        int n_segments = whisper_full_n_segments(self->stateInp.ctx);
-        for (int i = 0; i < n_segments; i++) {
-            const char * text_cur = whisper_full_get_segment_text(self->stateInp.ctx, i);
-
-            // append the text to the result
-            result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
-        }
-
-        const float tRecording = (float)self->stateInp.n_samples / (float)self->stateInp.dataFormat.mSampleRate;
-
-        // append processing time
-        result = [result stringByAppendingString:[NSString stringWithFormat:@"\n\n[recording time:  %5.3f s]", tRecording]];
-        result = [result stringByAppendingString:[NSString stringWithFormat:@"  \n[processing time: %5.3f s]", endTime - startTime]];
-
-        // dispatch the result to the main thread
-        dispatch_async(dispatch_get_main_queue(), ^{
-            self->_textviewResult.text = result;
-            self->stateInp.isTranscribing = false;
-        });
-    });
-}
-
-//
-// Callback implementation
-//
-
-void AudioInputCallback(void * inUserData,
-                        AudioQueueRef inAQ,
-                        AudioQueueBufferRef inBuffer,
-                        const AudioTimeStamp * inStartTime,
-                        UInt32 inNumberPacketDescriptions,
-                        const AudioStreamPacketDescription * inPacketDescs)
-{
-    StateInp * stateInp = (StateInp*)inUserData;
-
-    if (!stateInp->isCapturing) {
-        NSLog(@"Not capturing, ignoring audio");
-        return;
-    }
-
-    const int n = inBuffer->mAudioDataByteSize / 2;
-
-    NSLog(@"Captured %d new samples", n);
-
-    if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
-        NSLog(@"Too much audio data, ignoring");
-
-        dispatch_async(dispatch_get_main_queue(), ^{
-            ViewController * vc = (__bridge ViewController *)(stateInp->vc);
-            [vc stopCapturing];
-        });
-
-        return;
-    }
-
-    for (int i = 0; i < n; i++) {
-        stateInp->audioBufferI16[stateInp->n_samples + i] = ((short*)inBuffer->mAudioData)[i];
-    }
-
-    stateInp->n_samples += n;
-
-    // put the buffer back in the queue
-    AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);
-
-    if (stateInp->isRealtime) {
-        // dipatch onTranscribe() to the main thread
-        dispatch_async(dispatch_get_main_queue(), ^{
-            ViewController * vc = (__bridge ViewController *)(stateInp->vc);
-            [vc onTranscribe:nil];
-        });
-    }
-}
-
-@end
--- a/examples/whisper.objc/whisper.objc/main.m
+++ b/examples/whisper.objc/whisper.objc/main.m
@ -1,18 +0,0 @@
-//
-//  main.m
-//  whisper.objc
-//
-//  Created by Georgi Gerganov on 23.10.22.
-//
-
-#import <UIKit/UIKit.h>
-#import "AppDelegate.h"
-
-int main(int argc, char * argv[]) {
-    NSString * appDelegateClassName;
-    @autoreleasepool {
-        // Setup code that might create autoreleased objects goes here.
-        appDelegateClassName = NSStringFromClass([AppDelegate class]);
-    }
-    return UIApplicationMain(argc, argv, nil, appDelegateClassName);
-}
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -1,47 +0,0 @@
-#
-# libmain
-#
-
-set(TARGET libmain)
-
-add_executable(${TARGET}
-    emscripten.cpp
-    )
-
-target_link_libraries(${TARGET} PRIVATE
-    whisper
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside main.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/libmain.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/whisper.wasm/main.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-#
-# whisper.wasm
-#
-
-set(TARGET whisper.wasm)
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/whisper.wasm/README.md
+++ b/examples/whisper.wasm/README.md
@ -1,42 +0,0 @@
-# whisper.wasm
-
-Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser
-
-This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
-implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer -
-it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3
-real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about
-~20-30 seconds).
-
-This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make
-sure that [your browser supports them](https://webassembly.org/roadmap/).
-
-The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and
-performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both
-transcription and translation are supported.
-
-Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page.
-
-The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the
-audio is limited to 120 seconds.
-
-## Live demo
-
-Link: https://whisper.ggerganov.com
-
-![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)
-
-## Build instructions
-
-```bash (v3.1.2)
-# build using Emscripten
-git clone https://github.com/ggerganov/whisper.cpp
-cd whisper.cpp
-mkdir build-em && cd build-em
-emcmake cmake ..
-make -j
-
-# copy the produced page to your HTTP path
-cp bin/whisper.wasm/*       /path/to/html/
-cp bin/libwhisper.worker.js /path/to/html/
-```
--- a/examples/whisper.wasm/emscripten.cpp
+++ b/examples/whisper.wasm/emscripten.cpp
@ -1,108 +0,0 @@
-#include "whisper.h"
-
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <vector>
-#include <thread>
-
-std::thread g_worker;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-EMSCRIPTEN_BINDINGS(whisper) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        --index;
-
-        if (index < g_contexts.size()) {
-            whisper_free(g_contexts[index]);
-            g_contexts[index] = nullptr;
-        }
-    }));
-
-    emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-        params.print_realtime   = true;
-        params.print_progress   = false;
-        params.print_timestamps = true;
-        params.print_special    = false;
-        params.translate        = translate;
-        params.language         = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
-        params.n_threads        = std::min(8, (int) std::thread::hardware_concurrency());
-        params.offset_ms        = 0;
-
-        std::vector<float> pcmf32;
-        const int n = audio["length"].as<int>();
-
-        emscripten::val heap = emscripten::val::module_property("HEAPU8");
-        emscripten::val memory = heap["buffer"];
-
-        pcmf32.resize(n);
-
-        emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
-        memoryView.call<void>("set", audio);
-
-        // print system information
-        {
-            printf("system_info: n_threads = %d / %d | %s\n",
-                    params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
-
-            printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
-                    __func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                    params.n_threads, 1,
-                    params.language,
-                    params.translate ? "translate" : "transcribe");
-
-            printf("\n");
-        }
-
-        // run the worker
-        {
-            g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
-                whisper_reset_timings(g_contexts[index]);
-                whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
-                whisper_print_timings(g_contexts[index]);
-            });
-        }
-
-        return 0;
-    }));
-}
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -1,555 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>whisper.cpp : WASM example</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-    </head>
-    <body>
-        <div id="main-container">
-            <b>Minimal <a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running fully in the browser</b>
-
-            <br><br>
-
-            Usage instructions:<br>
-            <ul>
-                <li>Load a ggml model file (you can obtain one from <a href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
-                <li>Select audio file to transcribe or record audio from the microphone (sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
-                <li>Click on the "Transcribe" button to start the transcription</li>
-            </ul>
-
-            Note that the computation is quite heavy and may take a few seconds to complete.<br>
-            The transcription results will be displayed in the text area below.<br><br>
-            <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
-
-            <br><br><hr>
-
-            <div id="model">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-tiny"    onclick="loadWhisper('tiny')">tiny (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <button id="fetch-whisper-base"    onclick="loadWhisper('base')">base (142 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-
-                <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-            </div>
-
-            <br>
-
-            <!-- radio button to select between file upload or microphone -->
-            <div id="input">
-                Input:
-                <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" /> File
-                <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
-            </div>
-
-            <br>
-
-            <div id="input_file">
-                Audio file:
-                <input type="file" id="file" name="file" onchange="loadAudio(event)" />
-            </div>
-
-            <div id="input_mic" style="display: none;">
-                Microphone:
-                <button id="start" onclick="startRecording()">Start</button>
-                <button id="stop" onclick="stopRecording()" disabled>Stop</button>
-
-                <!-- progress bar to show recording progress -->
-                <br><br>
-                <div id="progress" style="display: none;">
-                    <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
-                    <div id="progress-text">0%</div>
-                </div>
-            </div>
-
-            <audio controls="controls" id="audio" loop hidden>
-                Your browser does not support the &lt;audio&gt; tag.
-                <source id="source" src="" type="audio/wav" />
-            </audio>
-
-            <hr><br>
-
-            <table>
-                <tr>
-                    <td>
-                        Language:
-                        <select id="language" name="language">
-                            <option value="en">English</option>
-                            <option value="ar">Arabic</option>
-                            <option value="hy">Armenian</option>
-                            <option value="az">Azerbaijani</option>
-                            <option value="eu">Basque</option>
-                            <option value="be">Belarusian</option>
-                            <option value="bn">Bengali</option>
-                            <option value="bg">Bulgarian</option>
-                            <option value="ca">Catalan</option>
-                            <option value="zh">Chinese</option>
-                            <option value="hr">Croatian</option>
-                            <option value="cs">Czech</option>
-                            <option value="da">Danish</option>
-                            <option value="nl">Dutch</option>
-                            <option value="en">English</option>
-                            <option value="et">Estonian</option>
-                            <option value="tl">Filipino</option>
-                            <option value="fi">Finnish</option>
-                            <option value="fr">French</option>
-                            <option value="gl">Galician</option>
-                            <option value="ka">Georgian</option>
-                            <option value="de">German</option>
-                            <option value="el">Greek</option>
-                            <option value="gu">Gujarati</option>
-                            <option value="iw">Hebrew</option>
-                            <option value="hi">Hindi</option>
-                            <option value="hu">Hungarian</option>
-                            <option value="is">Icelandic</option>
-                            <option value="id">Indonesian</option>
-                            <option value="ga">Irish</option>
-                            <option value="it">Italian</option>
-                            <option value="ja">Japanese</option>
-                            <option value="kn">Kannada</option>
-                            <option value="ko">Korean</option>
-                            <option value="la">Latin</option>
-                            <option value="lv">Latvian</option>
-                            <option value="lt">Lithuanian</option>
-                            <option value="mk">Macedonian</option>
-                            <option value="ms">Malay</option>
-                            <option value="mt">Maltese</option>
-                            <option value="no">Norwegian</option>
-                            <option value="fa">Persian</option>
-                            <option value="pl">Polish</option>
-                            <option value="pt">Portuguese</option>
-                            <option value="ro">Romanian</option>
-                            <option value="ru">Russian</option>
-                            <option value="sr">Serbian</option>
-                            <option value="sk">Slovak</option>
-                            <option value="sl">Slovenian</option>
-                            <option value="es">Spanish</option>
-                            <option value="sw">Swahili</option>
-                            <option value="sv">Swedish</option>
-                            <option value="ta">Tamil</option>
-                            <option value="te">Telugu</option>
-                            <option value="th">Thai</option>
-                            <option value="tr">Turkish</option>
-                            <option value="uk">Ukrainian</option>
-                            <option value="ur">Urdu</option>
-                            <option value="vi">Vietnamese</option>
-                            <option value="cy">Welsh</option>
-                            <option value="yi">Yiddish</option>
-                        </select>
-                    </td>
-                    <td>
-                        <button onclick="onProcess(false);">Transcribe</button>
-                    </td>
-                    <td>
-                        <button onclick="onProcess(true);">Translate</button>
-                    </td>
-                </tr>
-            </table>
-
-            <br>
-
-            <!-- textarea with height filling the rest of the page -->
-            <textarea id="output" rows="20"></textarea>
-
-            <br><br>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="helpers.js"></script>
-        <script type='text/javascript'>
-            // TODO: convert audio buffer to WAV
-            function setAudio(audio) {
-                //if (audio) {
-                //    // convert to 16-bit PCM
-                //    var blob = new Blob([audio], { type: 'audio/wav' });
-                //    var url = URL.createObjectURL(blob);
-                //    document.getElementById('source').src = url;
-                //    document.getElementById('audio').hidden = false;
-                //    document.getElementById('audio').loop = false;
-                //    document.getElementById('audio').load();
-                //} else {
-                //    document.getElementById('audio').hidden = true;
-                //}
-            }
-
-            function changeInput(input) {
-                if (input == 'file') {
-                    document.getElementById('input_file').style.display = 'block';
-                    document.getElementById('input_mic' ).style.display = 'none';
-                    document.getElementById('progress'  ).style.display = 'none';
-                } else {
-                    document.getElementById('input_file').style.display = 'none';
-                    document.getElementById('input_mic' ).style.display = 'block';
-                    document.getElementById('progress'  ).style.display = 'block';
-                }
-            }
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                }
-            };
-
-            // web audio context
-            var context = null;
-
-            // audio data
-            var audio = null;
-
-            // the whisper instance
-            var instance = null;
-            var model_whisper = '';
-
-            // helper function
-            function convertTypedArray(src, type) {
-                var buffer = new ArrayBuffer(src.byteLength);
-                var baseView = new src.constructor(buffer).set(src);
-                return new type(buffer);
-            }
-
-            //
-            // load model
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                model_whisper = fname;
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-            }
-
-            function loadFile(event, fname) {
-                var file = event.target.files[0] || null;
-                if (file == null) {
-                    return;
-                }
-
-                printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
-                printTextarea('loadFile: please wait ...');
-
-                var reader = new FileReader();
-                reader.onload = function(event) {
-                    var buf = new Uint8Array(reader.result);
-                    storeFS(fname, buf);
-                }
-                reader.readAsArrayBuffer(file);
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
-                document.getElementById('whisper-file'         ).style.display = 'none';
-                document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
-            }
-
-            function loadWhisper(model) {
-                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'tiny':    'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                    'base':    'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
-                };
-
-                let sizes = {
-                    'tiny.en': 75,
-                    'tiny':    75,
-                    'base.en': 142,
-                    'base':    142,
-                };
-
-                let url     = urls[model];
-                let dst     = 'whisper.bin';
-                let size_mb = sizes[model];
-
-                model_whisper = model;
-
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
-                document.getElementById('whisper-file'         ).style.display = 'none';
-                document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-tiny'   ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base'   ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('whisper-file'         ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // audio file
-            //
-
-            const kMaxAudio_s = 120;
-            const kSampleRate = 16000;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function loadAudio(event) {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                var file = event.target.files[0] || null;
-                if (file == null) {
-                    return;
-                }
-
-                printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
-                printTextarea('js: please wait ...');
-
-                var reader = new FileReader();
-                reader.onload = function(event) {
-                    var buf = new Uint8Array(reader.result);
-
-                    context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                        var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                        var source = offlineContext.createBufferSource();
-                        source.buffer = audioBuffer;
-                        source.connect(offlineContext.destination);
-                        source.start(0);
-
-                        offlineContext.startRendering().then(function(renderedBuffer) {
-                            audio = renderedBuffer.getChannelData(0);
-                            printTextarea('js: audio loaded, size: ' + audio.length);
-
-                            // truncate to first 30 seconds
-                            if (audio.length > kMaxAudio_s*kSampleRate) {
-                                audio = audio.slice(0, kMaxAudio_s*kSampleRate);
-                                printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
-                            }
-
-                            setAudio(audio);
-                        });
-                    }, function(e) {
-                        printTextarea('js: error decoding audio: ' + e);
-                        audio = null;
-                        setAudio(audio);
-                    });
-                }
-                reader.readAsArrayBuffer(file);
-            }
-
-            //
-            // microphone
-            //
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            function stopRecording() {
-                doRecording = false;
-            }
-
-            // record up to kMaxAudio_s seconds of audio from the microphone
-            // check if doRecording is false every 1000 ms and stop recording if so
-            // update progress information
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                document.getElementById('start').disabled = true;
-                document.getElementById('stop').disabled = false;
-
-                document.getElementById('progress-bar').style.width = '0%';
-                document.getElementById('progress-text').innerHTML = '0%';
-
-                doRecording = true;
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-                        };
-                        mediaRecorder.onstop = function(e) {
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            chunks = [];
-
-                            document.getElementById('start').disabled = false;
-                            document.getElementById('stop').disabled = true;
-
-                            var reader = new FileReader();
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        audio = renderedBuffer.getChannelData(0);
-                                        printTextarea('js: audio recorded, size: ' + audio.length);
-
-                                        // truncate to first 30 seconds
-                                        if (audio.length > kMaxAudio_s*kSampleRate) {
-                                            audio = audio.slice(0, kMaxAudio_s*kSampleRate);
-                                            printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
-                                        }
-                                        setAudio(audio);
-                                    });
-                                }, function(e) {
-                                    printTextarea('js: error decoding audio: ' + e);
-                                    audio = null;
-                                    setAudio(audio);
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-                        mediaRecorder.start();
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-
-                var interval = setInterval(function() {
-                    if (!doRecording) {
-                        clearInterval(interval);
-                        mediaRecorder.stop();
-                        stream.getTracks().forEach(function(track) {
-                            track.stop();
-                        });
-                    }
-
-                    document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
-                    document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
-                }, 1000);
-
-                printTextarea('js: recording ...');
-
-                setTimeout(function() {
-                    if (doRecording) {
-                        printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
-                        stopRecording();
-                    }
-                }, kMaxAudio_s*1000);
-            }
-
-            //
-            // transcribe
-            //
-
-            function onProcess(translate) {
-                if (!instance) {
-                    instance = Module.init('whisper.bin');
-
-                    if (instance) {
-                        printTextarea("js: whisper initialized, instance: " + instance);
-                        document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
-                    }
-                }
-
-                if (!instance) {
-                    printTextarea("js: failed to initialize whisper");
-                    return;
-                }
-
-                if (!audio) {
-                    printTextarea("js: no audio data");
-                    return;
-                }
-
-                if (instance) {
-                    printTextarea('');
-                    printTextarea('js: processing - this might take a while ...');
-                    printTextarea('');
-
-                    setTimeout(function() {
-                        var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
-                        console.log('js: full_default returned: ' + ret);
-                        if (ret) {
-                            printTextarea("js: whisper returned: " + ret);
-                        }
-                    }, 100);
-                }
-            }
-        </script>
-        <script type="text/javascript" src="main.js"></script>
-    </body>
-</html>
--- a/examples/yt-wsp.sh
+++ b/examples/yt-wsp.sh
@ -1,147 +0,0 @@
-#!/usr/bin/env bash
-
-# Small shell script to more easily automatically download and transcribe live stream VODs.
-# This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp
-# Use `./examples/yt-wsp.sh help` to print help info.
-#
-# Sample usage:
-#
-#   git clone https://github.com/ggerganov/whisper.cpp
-#   cd whisper.cpp
-#   make
-#   ./examples/yt-wsp.sh https://www.youtube.com/watch?v=1234567890
-#
-
-# MIT License
-
-# Copyright (c) 2022 Daniils Petrovs
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-set -Eeuo pipefail
-
-# You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage
-MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en
-WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable
-WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from
-
-msg() {
-    echo >&2 -e "${1-}"
-}
-
-cleanup() {
-    msg "Cleaning up..."
-    rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt"
-}
-
-print_help() {
-    echo "Usage: ./examples/yt-wsp.sh <video_url>"
-    echo "See configurable env variables in the script"
-    echo "This will produce an MP4 muxed file called res.mp4 in the working directory"
-    echo "Requirements: ffmpeg yt-dlp whisper"
-    echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience."
-    echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'"
-}
-
-check_requirements() {
-    if ! command -v ffmpeg &>/dev/null; then
-        echo "ffmpeg is required (https://ffmpeg.org)."
-        exit 1
-    fi
-
-    if ! command -v yt-dlp &>/dev/null; then
-        echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)."
-        exit 1
-    fi
-
-    if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
-        WHISPER_EXECUTABLE="./main"
-        if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
-            echo "Whisper is required (https://github.com/ggerganov/whisper.cpp):"
-            echo "Sample usage:"
-            echo ""
-            echo "  git clone https://github.com/ggerganov/whisper.cpp"
-            echo "  cd whisper.cpp"
-            echo "  make"
-            echo "  ./examples/yt-wsp.sh https://www.youtube.com/watch?v=1234567890"
-            echo ""
-            exit 1
-        fi
-    fi
-}
-
-if [[ $# -lt 1 ]]; then
-    print_help
-    exit 1
-fi
-
-if [[ "$1" == "help" ]]; then
-    print_help
-    exit 0
-fi
-
-temp_dir="tmp"
-source_url="$1"
-
-check_requirements
-
-msg "Downloading VOD..."
-
-# Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs
-yt-dlp \
-    -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
-    --embed-thumbnail \
-    --embed-chapters \
-    --xattrs \
-    "${source_url}" -o "${temp_dir}/vod.mp4"
-
-msg "Extracting audio and resampling..."
-
-ffmpeg -i "${temp_dir}/vod.mp4" \
-    -hide_banner \
-    -loglevel error \
-    -ar 16000 \
-    -ac 1 \
-    -c:a \
-    pcm_s16le -y "vod-resampled.wav"
-
-msg "Transcribing to subtitle file..."
-msg "Whisper specified at: ${WHISPER_EXECUTABLE}"
-
-$WHISPER_EXECUTABLE \
-    -m "${MODEL_PATH}" \
-    -l "${WHISPER_LANG}" \
-    -f "vod-resampled.wav" \
-    -t 8 \
-    -osrt \
-    --translate
-
-msg "Embedding subtitle track..."
-
-ffmpeg -i "${temp_dir}/vod.mp4" \
-    -hide_banner \
-    -loglevel error \
-    -i "vod-resampled.wav.srt" \
-    -c copy \
-    -c:s mov_text \
-    -y res.mp4
-
-cleanup
-
-msg "Done! Your finished file is ready: res.mp4"
--- a/extra/bench-all.sh
+++ b/extra/bench-all.sh
@ -1,59 +0,0 @@
-#!/bin/bash
-
-# Helper script to run the bench tool on all models and print the results in share-able format
-
-printf "Usage: ./bench.sh [n_threads]\n"
-
-if [ -z "$1" ]; then
-    n_threads=4
-else
-    n_threads=$1
-fi
-
-models=( "tiny" "base" "small" "medium" "large" )
-
-printf "\n"
-printf "Running benchmark for all models\n"
-printf "This can take a while!\n"
-printf "\n"
-
-printf "| CPU | OS | Config | Model | Th | Load | Enc. | Commit |\n"
-printf "| --- | -- | ------ | ----- | -- | ---- | ---- | ------ |\n"
-
-for model in "${models[@]}"; do
-    # run once to heat-up the cache
-    ./bench -m ./models/ggml-$model.bin -t $n_threads 2>/dev/null 1>/dev/null
-
-    # actual run
-    # store stderr output in a variable in order to parse it later
-    output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
-
-    # parse the output:
-    load_time=$(echo "$output" | grep "load time" | awk '{print $5}')
-    encode_time=$(echo "$output" | grep "encode time" | awk '{print $5}')
-    system_info=$(echo "$output" | grep "system_info")
-    n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
-
-    # floor to milliseconds
-    load_time=${load_time%.*}
-    encode_time=${encode_time%.*}
-
-    config=""
-
-    if [[ $system_info == *"AVX2 = 1"* ]]; then
-        config="$config AVX2"
-    fi
-
-    if [[ $system_info == *"NEON = 1"* ]]; then
-        config="$config NEON"
-    fi
-
-    if [[ $system_info == *"BLAS = 1"* ]]; then
-        config="$config BLAS"
-    fi
-
-    commit=$(git rev-parse --short HEAD)
-
-    printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time | $commit |\n"
-done
-
--- a/extra/convert-all.sh
+++ b/extra/convert-all.sh
@ -1,8 +0,0 @@
-#!/bin/bash
-
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
-
-for model in "${models[@]}"; do
-    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
-    mv -v models/ggml-model.bin models/ggml-$model.bin
-done
--- a/extra/deploy-wasm.sh
+++ b/extra/deploy-wasm.sh
@ -1,31 +0,0 @@
-#!/bin/bash
-#
-# This is a helper script to deploy all WebAssembly examples to my node
-# Run from the build directory:
-#
-# cd build-em
-# ../extra/deploy-wasm.sh
-#
-
-# check if emcmake is available
-if ! command -v emcmake &> /dev/null
-then
-    echo "Error: emscripten environment is not set up"
-    exit
-fi
-
-emcmake cmake .. && make -j
-if [ $? -ne 0 ]; then
-    echo "Error: build failed"
-    exit
-fi
-
-# copy all wasm files to the node
-scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/libmain.worker.js    root@linode0:/var/www/html/whisper/
-scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
-scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
-scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
-scp bin/bench.wasm/*   root@linode0:/var/www/html/whisper/bench/   && scp bin/libbench.worker.js   root@linode0:/var/www/html/whisper/bench/
-
-echo "Done"
-exit
--- a/extra/sha-all.sh
+++ b/extra/sha-all.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-# Compute the SHA1 of all model files in ./models/ggml-*.bin
-
-for f in ./models/ggml-*.bin; do
-    shasum "$f" -a 1
-done
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@ -1,174 +1,5 @@
 #pragma once

-//
-// GGML Tensor Library
-//
-// This documentation is still a work in progress.
-// If you wish some specific topics to be covered, feel free to drop a comment:
-//
-//   https://github.com/ggerganov/whisper.cpp/issues/40
-//
-// ## Overview
-//
-// This library implements:
-//
-//  - a set of tensor operations
-//  - automatic differentiation
-//  - basic optimization algorithms
-//
-// The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes,
-// but is not limited to, the following:
-//
-//  - linear regression
-//  - support vector machines
-//  - neural networks
-//
-// The library allows the user to define a certain function using the available tensor operations. This function
-// definition is represented internally via a computation graph. Each tensor operation in the function definition
-// corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
-// function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
-// using one of the available optimization algorithms.
-//
-// For example, here we define the function: f(x) = a*x^2 + b
-//
-//   {
-//       struct ggml_init_params params = {
-//           .mem_size   = 16*1024*1024,
-//           .mem_buffer = NULL,
-//       };
-//
-//       // memory allocation happens here
-//       struct ggml_context * ctx = ggml_init(params);
-//
-//       struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
-//
-//       ggml_set_param(ctx, x); // x is an input variable
-//
-//       struct ggml_tensor * a  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
-//       struct ggml_tensor * b  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
-//       struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
-//       struct ggml_tensor * f  = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
-//
-//       ...
-//   }
-//
-// Notice that the function definition above does not involve any actual computation. The computation is performed only
-// when the user explicitly requests it. For example, to compute the function's value at x = 2.0:
-//
-//   {
-//       ...
-//
-//       struct ggml_cgraph gf = ggml_build_forward(f);
-//
-//       // set the input variable and parameter values
-//       ggml_set_f32(x, 2.0f);
-//       ggml_set_f32(a, 3.0f);
-//       ggml_set_f32(b, 4.0f);
-//
-//       ggml_graph_compute(ctx0, &gf);
-//
-//       printf("f = %f\n", ggml_get_f32_1d(f, 0));
-//
-//       ...
-//   }
-//
-// The actual computation is performed in the ggml_graph_compute() function.
-//
-// The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
-// ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
-// in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
-// and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
-// actually needed.
-//
-// The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
-// differentiation and optimization algorithms.
-//
-// The described approach allows to define the function graph once and then compute its forward or backward graphs
-// multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
-// the user can avoid the memory allocation overhead at runtime.
-//
-// The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
-// citizens, but in theory the library can be extended to support FP8 and integer data types.
-//
-// Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary
-// and binary operations. Most of the available operations fall into one of these two categories. With time, it became
-// clear that the library needs to support more complex operations. The way to support these operations is not clear
-// yet, but a few examples are demonstrated in the following operations:
-//
-//   - ggml_permute()
-//   - ggml_conv_1d_1s()
-//   - ggml_conv_1d_2s()
-//
-// For each tensor operator, the library implements a forward and backward computation function. The forward function
-// computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
-// input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a
-// calculus class, or watch the following video:
-//
-//   What is Automatic Differentiation?
-//   https://www.youtube.com/watch?v=wG_nF1awSSY
-//
-//
-// ## Tensor data (struct ggml_tensor)
-//
-// The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
-// the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
-// pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
-//
-//   {
-//       struct ggml_tensor * c = ggml_add(ctx, a, b);
-//
-//       assert(c->src[0] == a);
-//       assert(c->src[1] == b);
-//   }
-//
-// The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
-// number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
-// to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
-// permutation. All tensor operations have to take the stride into account and not assume that the tensor is
-// contiguous in memory.
-//
-// The data of the tensor is accessed via the "data" pointer. For example:
-//
-//   {
-//       struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
-//
-//       // a[1, 2] = 1.0f;
-//       *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
-//
-//       // a[2, 0] = 2.0f;
-//       *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
-//
-//       ...
-//   }
-//
-// Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
-//
-// ## The matrix multiplication operator (ggml_mul_mat)
-//
-// TODO
-//
-//
-// ## Multi-threading
-//
-// TODO
-//
-//
-// ## Overview of ggml.c
-//
-// TODO
-//
-//
-// ## SIMD optimizations
-//
-// TODO
-//
-//
-// ## Debugging ggml
-//
-// TODO
-//
-//
-
 #ifdef  __cplusplus
 extern "C" {
 #endif
@ -180,7 +11,7 @@ extern "C" {
 #define GGML_MAX_DIMS     4
 #define GGML_MAX_NODES    4096
 #define GGML_MAX_PARAMS   16
-#define GGML_MAX_CONTEXTS 64
+#define GGML_MAX_CONTEXTS 16
 #define GGML_MAX_OPT      4

 #ifdef __ARM_NEON
@ -190,8 +21,7 @@ typedef __fp16 ggml_fp16_t;
 typedef uint16_t ggml_fp16_t;
 #endif

-// convert FP16 <-> FP32
-float       ggml_fp16_to_fp32(ggml_fp16_t x);
+float ggml_fp16_to_fp32(ggml_fp16_t x);
 ggml_fp16_t ggml_fp32_to_fp16(float x);

 struct ggml_object;
@ -206,7 +36,6 @@ enum ggml_type {
    GGML_TYPE_COUNT,
 };

-// available tensor operations:
 enum ggml_op {
    GGML_OP_NONE = 0,

@ -307,7 +136,6 @@ struct ggml_init_params {
    void * mem_buffer; // if NULL, memory will be allocated internally
 };

-void    ggml_time_init(void); // call this once at the beginning of the program
 int64_t ggml_time_ms(void);
 int64_t ggml_time_us(void);
 int64_t ggml_cycles(void);
@ -719,19 +547,6 @@ enum ggml_opt_result ggml_opt(
        struct ggml_opt_params params,
        struct ggml_tensor * f);

-//
-// system info
-//
-
-int ggml_cpu_has_avx(void);
-int ggml_cpu_has_avx2(void);
-int ggml_cpu_has_avx512(void);
-int ggml_cpu_has_neon(void);
-int ggml_cpu_has_f16c(void);
-int ggml_cpu_has_fp16_va(void);
-int ggml_cpu_has_wasm_simd(void);
-int ggml_cpu_has_blas(void);
-
 #ifdef  __cplusplus
 }
 #endif
--- a/main.cpp
+++ b/main.cpp
@ -0,0 +1,243 @@
+#include "whisper.h"
+
+// third-party utilities
+// use your favorite implementations
+#define DR_WAV_IMPLEMENTATION
+#include "dr_wav.h"
+
+#include <cstdio>
+#include <string>
+#include <thread>
+#include <vector>
+
+//  500 -> 00:05.000
+// 6000 -> 01:00.000
+std::string to_timestamp(int64_t t) {
+    int64_t sec = t/100;
+    int64_t msec = t - sec*100;
+    int64_t min = sec/60;
+    sec = sec - min*60;
+
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+
+    return std::string(buf);
+}
+
+// command-line parameters
+struct whisper_params {
+    int32_t seed      = -1; // RNG seed, not used currently
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t offset_ms = 0;
+
+    bool verbose              = false;
+    bool translate            = false;
+    bool print_special_tokens = false;
+    bool no_timestamps        = false;
+
+    std::string language  = "en";
+    std::string model     = "models/ggml-base.en.bin";
+
+    std::vector<std::string> fname_inp = {};
+};
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
+
+bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg[0] != '-') {
+            params.fname_inp.push_back(arg);
+            continue;
+        }
+
+        if (arg == "-s" || arg == "--seed") {
+            params.seed = std::stoi(argv[++i]);
+        } else if (arg == "-t" || arg == "--threads") {
+            params.n_threads = std::stoi(argv[++i]);
+        } else if (arg == "-o" || arg == "--offset") {
+            params.offset_ms = std::stoi(argv[++i]);
+        } else if (arg == "-v" || arg == "--verbose") {
+            params.verbose = true;
+        } else if (arg == "--translate") {
+            params.translate = true;
+        } else if (arg == "-l" || arg == "--language") {
+            params.language = argv[++i];
+            if (whisper_lang_id(params.language.c_str()) == -1) {
+                fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
+                whisper_print_usage(argc, argv, params);
+                exit(0);
+            }
+        } else if (arg == "-ps" || arg == "--print_special") {
+            params.print_special_tokens = true;
+        } else if (arg == "-nt" || arg == "--no_timestamps") {
+            params.no_timestamps = true;
+        } else if (arg == "-m" || arg == "--model") {
+            params.model = argv[++i];
+        } else if (arg == "-f" || arg == "--file") {
+            params.fname_inp.push_back(argv[++i]);
+        } else if (arg == "-h" || arg == "--help") {
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        } else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+    fprintf(stderr, "\n");
+    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h,       --help           show this help message and exit\n");
+    fprintf(stderr, "  -s SEED,  --seed SEED      RNG seed (default: -1)\n");
+    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "  -o N,     --offset N       offset in milliseconds (default: %d)\n", params.offset_ms);
+    fprintf(stderr, "  -v,       --verbose        verbose output\n");
+    fprintf(stderr, "            --translate      translate from source language to english\n");
+    fprintf(stderr, "  -ps,      --print_special  print special tokens\n");
+    fprintf(stderr, "  -nt,      --no_timestamps  do not print timestamps\n");
+    fprintf(stderr, "  -l LANG,  --language LANG  spoken language (default: %s)\n", params.language.c_str());
+    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "  -f FNAME, --file FNAME     input WAV file path\n");
+    fprintf(stderr, "\n");
+}
+
+int main(int argc, char ** argv) {
+    whisper_params params;
+
+    if (whisper_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    if (params.seed < 0) {
+        params.seed = time(NULL);
+    }
+
+    if (params.fname_inp.empty()) {
+        fprintf(stderr, "error: no input files specified\n");
+        whisper_print_usage(argc, argv, params);
+        return 1;
+    }
+
+    // whisper init
+
+    struct whisper_context * ctx = whisper_init(params.model.c_str());
+
+    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
+        const auto fname_inp = params.fname_inp[f];
+
+        // WAV input
+        std::vector<float> pcmf32;
+        {
+            drwav wav;
+            if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
+                fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
+                whisper_print_usage(argc, argv, {});
+                return 2;
+            }
+
+            if (wav.channels != 1 && wav.channels != 2) {
+                fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
+                return 3;
+            }
+
+            if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
+                fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
+                return 4;
+            }
+
+            if (wav.bitsPerSample != 16) {
+                fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
+                return 5;
+            }
+
+            int n = wav.totalPCMFrameCount;
+
+            std::vector<int16_t> pcm16;
+            pcm16.resize(n*wav.channels);
+            drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
+            drwav_uninit(&wav);
+
+            // convert to mono, float
+            pcmf32.resize(n);
+            if (wav.channels == 1) {
+                for (int i = 0; i < n; i++) {
+                    pcmf32[i] = float(pcm16[i])/32768.0f;
+                }
+            } else {
+                for (int i = 0; i < n; i++) {
+                    pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
+                }
+            }
+        }
+
+        // print some info about the processing
+        {
+            printf("\n");
+            if (!whisper_is_multilingual(ctx)) {
+                if (params.language != "en" || params.translate) {
+                    params.language = "en";
+                    params.translate = false;
+                    printf("%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
+                }
+            }
+            printf("%s: processing '%s' (%d samples, %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
+                    __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE, params.n_threads,
+                    params.language.c_str(),
+                    params.translate ? "translate" : "transcribe",
+                    params.no_timestamps ? 0 : 1);
+            printf("\n");
+        }
+
+        // run the inference
+        {
+            whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
+
+            wparams.print_realtime       = true;
+            wparams.print_progress       = false;
+            wparams.print_timestamps     = !params.no_timestamps;
+            wparams.print_special_tokens = params.print_special_tokens;
+            wparams.translate            = params.translate;
+            wparams.language             = params.language.c_str();
+            wparams.n_threads            = params.n_threads;
+            wparams.offset_ms            = params.offset_ms;
+
+            if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
+                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
+                return 6;
+            }
+
+            // print result;
+            if (!wparams.print_realtime) {
+                printf("\n");
+
+                const int n_segments = whisper_full_n_segments(ctx);
+                for (int i = 0; i < n_segments; ++i) {
+                    const char * text = whisper_full_get_segment_text(ctx, i);
+
+                    if (params.no_timestamps) {
+                        printf ("%s", text);
+                        fflush(stdout);
+                    } else {
+                        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+                        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+
+                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
+                    }
+                }
+            }
+        }
+    }
+
+    whisper_print_timings(ctx);
+    whisper_free(ctx);
+
+    return 0;
+}
--- a/models/README.md
+++ b/models/README.md
@ -1,20 +1,17 @@
 ## Whisper model files in custom ggml format

 The [original Whisper PyTorch models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L27)
-have been converted to custom `ggml` format in order to be able to load them in C/C++. The conversion has been performed
-using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. You can either obtain the original models and generate
-the `ggml` files yourself using the conversion script, or you can use the [download-ggml-model.sh](download-ggml-model.sh)
-script to download the already converted models. Currently, they are hosted on the following locations:
-
- https://huggingface.co/datasets/ggerganov/whisper.cpp
- https://ggml.ggerganov.com
+have been converted to custom `ggml` format in order to be able to load them in C/C++. The conversion has been performed using the
+[convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. You can either obtain the original models and generate the `ggml` files
+yourself using the conversion script, or you can use the [download-ggml-model.sh](download-ggml-model.sh) script to download the
+already converted models.

 Sample usage:

 ```java
 $ ./download-ggml-model.sh base.en
 Downloading ggml model base.en ...
-models/ggml-base.en.bin          100%[=============================================>] 141.11M  5.41MB/s    in 22s
+models/ggml-base.en.bin          100%[=============================================>] 141.11M  5.41MB/s    in 22s     
 Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
 You can now use it like this:

@ -25,41 +22,7 @@ A third option to obtain the model files is to download them from Hugging Face:

 https://huggingface.co/datasets/ggerganov/whisper.cpp/tree/main

-## Available models
-
-| Model     | Disk   | Mem     | SHA                                        |
-| ---       | ---    | ---     | ---                                        |
-| tiny      |  75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
-| tiny.en   |  75 MB | ~390 MB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
-| base      | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
-| base.en   | 142 MB | ~500 MB | `137c40403d78fd54d454da0f9bd998f78703390c` |
-| small     | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
-| small.en  | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
-| medium    | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
-| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
-| large-v1  | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
-| large     | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
-
 ## Model files for testing purposes

-The model files prefixed with `for-tests-` are empty (i.e. do not contain any weights) and are used by the CI for
-testing purposes. They are directly included in this repository for convenience and the Github Actions CI uses them to
-run various sanitizer tests.
-
-## Fine-tuned models
-
-There are community efforts for creating fine-tuned Whisper models using extra training data. For example, this
-[blog post](https://huggingface.co/blog/fine-tune-whisper) describes a method for fine-tuning using Hugging Face (HF)
-Transformer implementation of Whisper. The produced models are in slightly different format compared to the original
-OpenAI format. To read the HF models you can use the [convert-h5-to-ggml.py](convert-h5-to-ggml.py) script like this:
-
-```bash
-git clone https://github.com/openai/whisper
-git clone https://github.com/ggerganov/whisper.cpp
-
-# clone HF fine-tuned model (this is just an example)
-git clone https://huggingface.co/openai/whisper-base.en
-
-# convert the model to ggml
-python3 ./whisper.cpp/models/convert-h5-to-ggml.py ./whisper-medium/ ./whisper .
-```
+The model files pefixed with `for-tests-` are empty (i.e. do not contain any weights) and are used by the CI for testing purposes.
+They are directly included in this repository for convenience and the Github Actions CI uses them to run various sanitizer tests.
--- a/models/convert-h5-to-ggml.py
+++ b/models/convert-h5-to-ggml.py
@ -1,212 +0,0 @@
-# Convert Hugging Face fine-tuned models to ggml format
-#
-# Usage:
-#
-#   git clone https://github.com/openai/whisper
-#   git clone https://github.com/ggerganov/whisper.cpp
-#   git clone https://huggingface.co/openai/whisper-medium
-#
-#   python3 ./whisper.cpp/models/convert-h5-to-ggml.py ./whisper-medium/ ./whisper .
-#
-# This script is similar to "convert-pt-to-ggml.py"
-#
-# For more info:
-#
-#   https://github.com/ggerganov/whisper.cpp/issues/157
-#
-
-import io
-import os
-import sys
-import struct
-import json
-import code
-import torch
-import numpy as np
-
-from transformers import WhisperForConditionalGeneration
-
-conv_map = {
-        'self_attn.k_proj'              : 'attn.key',
-        'self_attn.q_proj'              : 'attn.query',
-        'self_attn.v_proj'              : 'attn.value',
-        'self_attn.out_proj'            : 'attn.out',
-        'self_attn_layer_norm'          : 'attn_ln',
-        'encoder_attn.q_proj'           : 'cross_attn.query',
-        'encoder_attn.v_proj'           : 'cross_attn.value',
-        'encoder_attn.out_proj'         : 'cross_attn.out',
-        'encoder_attn_layer_norm'       : 'cross_attn_ln',
-        'fc1'                           : 'mlp.0',
-        'fc2'                           : 'mlp.2',
-        'final_layer_norm'              : 'mlp_ln',
-        'encoder.layer_norm.bias'       : 'encoder.ln_post.bias',
-        'encoder.layer_norm.weight'     : 'encoder.ln_post.weight',
-        'encoder.embed_positions.weight': 'encoder.positional_embedding',
-        'decoder.layer_norm.bias'       : 'decoder.ln.bias',
-        'decoder.layer_norm.weight'     : 'decoder.ln.weight',
-        'decoder.embed_positions.weight': 'decoder.positional_embedding',
-        'decoder.embed_tokens.weight'   : 'decoder.token_embedding.weight',
-        'proj_out.weight'               : 'decoder.proj.weight',
-        }
-
-# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
-def bytes_to_unicode():
-    """
-    Returns list of utf-8 byte and a corresponding list of unicode strings.
-    The reversible bpe codes work on unicode strings.
-    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
-    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
-    This is a signficant percentage of your normal, say, 32K bpe vocab.
-    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
-    And avoids mapping to whitespace/control characters the bpe code barfs on.
-    """
-    bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
-    cs = bs[:]
-    n = 0
-    for b in range(2**8):
-        if b not in bs:
-            bs.append(b)
-            cs.append(2**8+n)
-            n += 1
-    cs = [chr(n) for n in cs]
-    return dict(zip(bs, cs))
-
-if len(sys.argv) < 4:
-    print("Usage: convert-h5-to-ggml.py dir_model path-to-whisper-repo dir-output [use-f32]\n")
-    sys.exit(1)
-
-dir_model   = sys.argv[1]
-dir_whisper = sys.argv[2]
-dir_out     = sys.argv[3]
-
-with open(dir_model + "/vocab.json", "r") as f:
-    encoder = json.load(f)
-with open(dir_model + "/added_tokens.json", "r") as f:
-    encoder_added = json.load(f)
-with open(dir_model + "/config.json", "r") as f:
-    hparams = json.load(f)
-
-model = WhisperForConditionalGeneration.from_pretrained(dir_model)
-
-#code.interact(local=locals())
-
-n_mels = hparams["num_mel_bins"]
-with np.load(os.path.join(dir_whisper, "whisper/assets", "mel_filters.npz")) as f:
-    filters = torch.from_numpy(f[f"mel_{n_mels}"])
-
-dir_tokenizer = dir_model
-
-fname_out = dir_out + "/ggml-model.bin"
-
-with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
-    tokens = json.load(f)
-
-# use 16-bit or 32-bit floats
-use_f16 = True
-if len(sys.argv) > 4:
-    use_f16 = False
-    fname_out = dir_out + "/ggml-model-f32.bin"
-
-fout = open(fname_out, "wb")
-
-fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
-fout.write(struct.pack("i", hparams["vocab_size"]))
-fout.write(struct.pack("i", hparams["max_source_positions"]))
-fout.write(struct.pack("i", hparams["d_model"]))
-fout.write(struct.pack("i", hparams["encoder_attention_heads"]))
-fout.write(struct.pack("i", hparams["encoder_layers"]))
-fout.write(struct.pack("i", hparams["max_length"]))
-fout.write(struct.pack("i", hparams["d_model"]))
-fout.write(struct.pack("i", hparams["decoder_attention_heads"]))
-fout.write(struct.pack("i", hparams["decoder_layers"]))
-fout.write(struct.pack("i", hparams["num_mel_bins"]))
-fout.write(struct.pack("i", use_f16))
-
-fout.write(struct.pack("i", filters.shape[0]))
-fout.write(struct.pack("i", filters.shape[1]))
-for i in range(filters.shape[0]):
-    for j in range(filters.shape[1]):
-        fout.write(struct.pack("f", filters[i][j]))
-
-byte_encoder = bytes_to_unicode()
-byte_decoder = {v:k for k, v in byte_encoder.items()}
-
-fout.write(struct.pack("i", len(tokens)))
-
-tokens = sorted(tokens.items(), key=lambda x: x[1])
-for key in tokens:
-    text = bytearray([byte_decoder[c] for c in key[0]])
-    fout.write(struct.pack("i", len(text)))
-    fout.write(text)
-
-list_vars = model.state_dict()
-for name in list_vars.keys():
-    # this seems to not be used
-    # ref: https://github.com/huggingface/transformers/blob/9a5b84a0076a04fe9596da72e8668069d4f09ea0/src/transformers/models/whisper/modeling_whisper.py#L1099-L1106
-    if name == "proj_out.weight":
-        print('Skipping', name)
-        continue
-
-    src = name
-
-    nn = name
-    if name != "proj_out.weight":
-        nn = nn.split(".")[1:]
-    else:
-        nn = nn.split(".")
-
-    if nn[1] == "layers":
-        nn[1] = "blocks"
-        if ".".join(nn[3:-1]) == "encoder_attn.k_proj":
-            mapped = "attn.key" if nn[0] == "encoder" else "cross_attn.key"
-        else:
-            mapped = conv_map[".".join(nn[3:-1])]
-        name = ".".join(nn[:3] + [mapped] + nn[-1:])
-    else:
-        name = ".".join(nn)
-        name = conv_map[name] if name in conv_map else name
-
-    print(src, ' -> ', name)
-    data = list_vars[src].squeeze().numpy()
-    data = data.astype(np.float16)
-
-    # reshape conv bias from [n] to [n, 1]
-    if name == "encoder.conv1.bias" or \
-       name == "encoder.conv2.bias":
-        data = data.reshape(data.shape[0], 1)
-        print("  Reshaped variable: " + name + " to shape: ", data.shape)
-
-    n_dims = len(data.shape)
-    print(name, n_dims, data.shape)
-
-    # looks like the whisper models are in f16 by default
-    # so we need to convert the small tensors to f32 until we fully support f16 in ggml
-    # ftype == 0 -> float32, ftype == 1 -> float16
-    ftype = 1;
-    if use_f16:
-        if n_dims < 2 or \
-                name == "encoder.conv1.bias"   or \
-                name == "encoder.conv2.bias"   or \
-                name == "encoder.positional_embedding" or \
-                name == "decoder.positional_embedding":
-            print("  Converting to float32")
-            data = data.astype(np.float32)
-            ftype = 0
-    else:
-        data = data.astype(np.float32)
-        ftype = 0
-
-    # header
-    str = name.encode('utf-8')
-    fout.write(struct.pack("iii", n_dims, len(str), ftype))
-    for i in range(n_dims):
-        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
-    fout.write(str);
-
-    # data
-    data.tofile(fout)
-
-fout.close()
-
-print("Done. Output file: " + fname_out)
-print("")
--- a/models/download-ggml-model.cmd
+++ b/models/download-ggml-model.cmd
@ -1,64 +0,0 @@
-@echo off
-
-pushd %~dp0
-set models_path=%CD%
-for %%d in (%~dp0..) do set root_path=%%~fd
-popd
-
-set argc=0
-for %%x in (%*) do set /A argc+=1
-
-set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
-
-if %argc% neq 1 (
-  echo.
-  echo Usage: download-ggml-model.cmd model
-  CALL :list_models
-  goto :eof
-)
-
-set model=%1
-
-for %%b in (%models%) do (
-  if "%%b"=="%model%" (
-    CALL :download_model
-    goto :eof
-  )
-)
-
-echo Invalid model: %model%
-CALL :list_models
-goto :eof
-
-:download_model
-echo Downloading ggml model %model%...
-
-cd %models_path%
-
-if exist "ggml-%model%.bin" (
-  echo Model %model% already exists. Skipping download.
-  goto :eof
-)
-
-PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-WebRequest -Uri https://ggml.ggerganov.com/ggml-model-whisper-%model%.bin -OutFile ggml-%model%.bin"
-
-if %ERRORLEVEL% neq 0 (
-  echo Failed to download ggml model %model%
-  echo Please try again later or download the original Whisper model files and convert them yourself.
-  goto :eof
-)
-
-echo Done! Model %model% saved in %root_path%\models\ggml-%model%.bin
-echo You can now use it like this:
-echo main.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav
-
-goto :eof
-
-:list_models
-  echo.
-  echo Available models:
-  (for %%a in (%models%) do ( 
-    echo %%a 
-  ))
-  echo.
-  exit /b
--- a/samples/README.md
+++ b/samples/README.md
@ -1,6 +0,0 @@
-# Audio samples
-
-This folder contains various audio files used for testing.
-If you want to quickly get some more samples, simply run `make samples`. This will download several public audio files and convert them to appropriate 16-bit WAV format using `ffmpeg`
-
-https://github.com/ggerganov/whisper.cpp/blob/a09ce6e8899198015729ffc49ae10f67370906b1/Makefile#L104-L123
--- a/stream.cpp
+++ b/stream.cpp
@ -0,0 +1,315 @@
+// Real-time speech recognition of input from a microphone
+//
+// A very quick-n-dirty implementation serving mainly as a proof of concept.
+
+#include "whisper.h"
+
+// third-party utilities
+// use your favorite implementations
+#define DR_WAV_IMPLEMENTATION
+#include "dr_wav.h"
+
+#include <SDL.h>
+#include <SDL_audio.h>
+
+#include <cassert>
+#include <cstdio>
+#include <string>
+#include <thread>
+#include <vector>
+
+//  500 -> 00:05.000
+// 6000 -> 01:00.000
+std::string to_timestamp(int64_t t) {
+    int64_t sec = t/100;
+    int64_t msec = t - sec*100;
+    int64_t min = sec/60;
+    sec = sec - min*60;
+
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+
+    return std::string(buf);
+}
+
+// command-line parameters
+struct whisper_params {
+    int32_t seed      = -1; // RNG seed, not used currently
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t step_ms   = 3000;
+
+    bool verbose              = false;
+    bool translate            = false;
+    bool no_context           = true;
+    bool print_special_tokens = false;
+    bool no_timestamps        = true;
+
+    std::string language  = "en";
+    std::string model     = "models/ggml-base.en.bin";
+    std::string fname_inp = "samples/jfk.wav";
+};
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
+
+bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-s" || arg == "--seed") {
+            params.seed = std::stoi(argv[++i]);
+        } else if (arg == "-t" || arg == "--threads") {
+            params.n_threads = std::stoi(argv[++i]);
+        } else if (arg == "--step") {
+            params.step_ms = std::stoi(argv[++i]);
+        } else if (arg == "-v" || arg == "--verbose") {
+            params.verbose = true;
+        } else if (arg == "--translate") {
+            params.translate = true;
+        } else if (arg == "-kc" || arg == "--keep-context") {
+            params.no_context = false;
+        } else if (arg == "-l" || arg == "--language") {
+            params.language = argv[++i];
+            if (whisper_lang_id(params.language.c_str()) == -1) {
+                fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
+                whisper_print_usage(argc, argv, params);
+                exit(0);
+            }
+        } else if (arg == "-ps" || arg == "--print_special") {
+            params.print_special_tokens = true;
+        } else if (arg == "-nt" || arg == "--no_timestamps") {
+            params.no_timestamps = true;
+        } else if (arg == "-m" || arg == "--model") {
+            params.model = argv[++i];
+        } else if (arg == "-f" || arg == "--file") {
+            params.fname_inp = argv[++i];
+        } else if (arg == "-h" || arg == "--help") {
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        } else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+    fprintf(stderr, "\n");
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h,       --help           show this help message and exit\n");
+    fprintf(stderr, "  -s SEED,  --seed SEED      RNG seed (default: -1)\n");
+    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "            --step N         audio step size in milliseconds (default: %d)\n", params.step_ms);
+    fprintf(stderr, "  -v,       --verbose        verbose output\n");
+    fprintf(stderr, "            --translate      translate from source language to english\n");
+    fprintf(stderr, "  -nc,      --no-context     disable context from earlier audio (default: false)\n");
+    fprintf(stderr, "  -ps,      --print_special  print special tokens\n");
+    fprintf(stderr, "  -nt,      --no_timestamps  do not print timestamps\n");
+    fprintf(stderr, "  -l LANG,  --language LANG  spoken language (default: %s)\n", params.language.c_str());
+    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "  -f FNAME, --file FNAME     input WAV file path (default: %s)\n", params.fname_inp.c_str());
+    fprintf(stderr, "\n");
+}
+
+//
+// SDL Audio capture
+//
+
+SDL_AudioDeviceID g_dev_id_in = 0;
+
+bool audio_sdl_init(const int capture_id) {
+    if (g_dev_id_in) {
+        fprintf(stderr, "%s: already initialized\n", __func__);
+        return false;
+    }
+
+    if (g_dev_id_in == 0) {
+        SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
+
+        if (SDL_Init(SDL_INIT_AUDIO) < 0) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
+            return (1);
+        }
+
+        SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium", SDL_HINT_OVERRIDE);
+
+        {
+            int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
+            printf("%s: found %d capture devices:\n", __func__, nDevices);
+            for (int i = 0; i < nDevices; i++) {
+                printf("%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
+            }
+        }
+    }
+
+    if (g_dev_id_in == 0) {
+        SDL_AudioSpec capture_spec_requested;
+        SDL_AudioSpec capture_spec_obtained;
+
+        SDL_zero(capture_spec_requested);
+        SDL_zero(capture_spec_obtained);
+
+        capture_spec_requested.freq     = WHISPER_SAMPLE_RATE;
+        capture_spec_requested.format   = AUDIO_F32;
+        capture_spec_requested.channels = 1;
+        capture_spec_requested.samples  = 1024;
+
+        if (capture_id >= 0) {
+            printf("%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
+            g_dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(capture_id, SDL_TRUE), SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
+        } else {
+            printf("%s: attempt to open default capture device ...\n", __func__);
+            g_dev_id_in = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
+        }
+        if (!g_dev_id_in) {
+            printf("%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
+            g_dev_id_in = 0;
+        } else {
+            printf("%s: obtained spec for input device (SDL Id = %d):\n", __func__, g_dev_id_in);
+            printf("%s:     - sample rate:       %d\n", __func__, capture_spec_obtained.freq);
+            printf("%s:     - format:            %d (required: %d)\n", __func__, capture_spec_obtained.format, capture_spec_requested.format);
+            printf("%s:     - channels:          %d (required: %d)\n", __func__, capture_spec_obtained.channels, capture_spec_requested.channels);
+            printf("%s:     - samples per frame: %d\n", __func__, capture_spec_obtained.samples);
+        }
+    }
+
+
+    return true;
+}
+
+///////////////////////////
+
+int main(int argc, char ** argv) {
+    whisper_params params;
+
+    if (whisper_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    if (params.seed < 0) {
+        params.seed = time(NULL);
+    }
+
+    // init audio
+
+    if (!audio_sdl_init(-1)) {
+        fprintf(stderr, "%s: audio_sdl_init() failed!\n", __func__);
+        return 1;
+    }
+
+    // whisper init
+
+    struct whisper_context * ctx = whisper_init(params.model.c_str());
+
+    const int n_samples = (params.step_ms/1000.0)*WHISPER_SAMPLE_RATE;
+    const int n_samples_30s = 30*WHISPER_SAMPLE_RATE;
+    std::vector<float> pcmf32(n_samples_30s, 0.0f);
+    std::vector<float> pcmf32_old;
+
+    // print some info about the processing
+    {
+        printf("\n");
+        if (!whisper_is_multilingual(ctx)) {
+            if (params.language != "en" || params.translate) {
+                params.language = "en";
+                params.translate = false;
+                printf("%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
+            }
+        }
+        printf("%s: processing %d samples (%.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
+                __func__, n_samples, float(n_samples)/WHISPER_SAMPLE_RATE, params.n_threads,
+                params.language.c_str(),
+                params.translate ? "translate" : "transcribe",
+                params.no_timestamps ? 0 : 1);
+        printf("\n");
+    }
+
+    SDL_PauseAudioDevice(g_dev_id_in, 0);
+
+    bool is_running = true;
+
+    // main audio loop
+    while (is_running) {
+        // process SDL events:
+        SDL_Event event;
+        while (SDL_PollEvent(&event)) {
+            switch (event.type) {
+                case SDL_QUIT:
+                    is_running = false;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // process 3 seconds of new audio
+        while (SDL_GetQueuedAudioSize(g_dev_id_in) < n_samples*sizeof(float)) {
+            SDL_Delay(1);
+        }
+        const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
+
+        // take one second from previous iteration
+        // TODO: better strategy
+        const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_30s/30 - n_samples_new));
+
+        //printf("processing: take = %d, new = %d, old = %d\n", n_samples_take, n_samples_new, (int) pcmf32_old.size());
+
+        pcmf32.resize(n_samples_new + n_samples_take);
+
+        for (int i = 0; i < n_samples_take; i++) {
+            pcmf32[i] = pcmf32_old[pcmf32_old.size() - n_samples_take + i];
+        }
+
+        SDL_DequeueAudio(g_dev_id_in, pcmf32.data() + n_samples_take, n_samples_new*sizeof(float));
+
+        pcmf32_old = pcmf32;
+
+        // run the inference
+        {
+            whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
+
+            wparams.print_progress       = false;
+            wparams.print_special_tokens = params.print_special_tokens;
+            wparams.print_realtime       = false;
+            wparams.print_timestamps     = !params.no_timestamps;
+            wparams.translate            = params.translate;
+            wparams.no_context           = params.no_context;
+            wparams.language             = params.language.c_str();
+            wparams.n_threads            = params.n_threads;
+
+            if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
+                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
+                return 6;
+            }
+
+            // print result;
+            {
+                printf("\n");
+
+                const int n_segments = whisper_full_n_segments(ctx);
+                for (int i = 0; i < n_segments; ++i) {
+                    const char * text = whisper_full_get_segment_text(ctx, i);
+
+                    if (params.no_timestamps) {
+                        printf ("%s", text);
+                        fflush(stdout);
+                    } else {
+                        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+                        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+
+                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
+                    }
+                }
+            }
+        }
+    }
+
+    whisper_print_timings(ctx);
+    whisper_free(ctx);
+
+    return 0;
+}
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -1,3 +0,0 @@
-*.wav
-*.ogg
-*.wav.txt
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Georgi Gerganov	4597c9c19b	wip : try to compress just mlp	2022-10-08 15:12:15 +03:00
Georgi Gerganov	4a4a754220	wip : experimenting	2022-10-08 14:08:43 +03:00
				`@ -1 +0,0 @@`
				"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance\|\|{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};