release : v1.5.4

fix : cuda order of synchronization when setting a buffer (ggml/679)
* fix : cuda order of synchronization when setting a buffer * also sync before memcpy --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-07-04 16:30:58 +02:00 · 2024-01-05 17:11:27 +02:00 · 2024-01-05 17:01:59 +02:00 · 2024-01-05 16:31:30 +02:00 · 2024-01-05 16:31:20 +02:00 · 2024-01-04 16:28:30 +02:00
212 changed files with 56886 additions and 6510 deletions
--- a/.devops/main-cuda.Dockerfile
+++ b/.devops/main-cuda.Dockerfile
@ -0,0 +1,38 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
 ARG CUDA_VERSION=12.3.1
 # Target the CUDA build image
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 # Target the CUDA runtime image
 ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 FROM ${BASE_CUDA_DEV_CONTAINER} AS build
 WORKDIR /app
 # Unless otherwise specified, we make a fat build.
 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 # Enable cuBLAS
 ENV WHISPER_CUBLAS=1
 RUN apt-get update && \
    apt-get install -y build-essential \
    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 # Ref: https://stackoverflow.com/a/53464012
 ENV CUDA_MAIN_VERSION=12.3
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 COPY .. .
 RUN make
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 WORKDIR /app
 RUN apt-get update && \
  apt-get install -y curl ffmpeg \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENTRYPOINT [ "bash", "-c" ]
--- a/.devops/main.Dockerfile
+++ b/.devops/main.Dockerfile
@ -0,0 +1,19 @@
 FROM ubuntu:22.04 AS build
 WORKDIR /app
 RUN apt-get update && \
  apt-get install -y build-essential \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY .. .
 RUN make
 FROM ubuntu:22.04 AS runtime
 WORKDIR /app
 RUN apt-get update && \
  apt-get install -y curl ffmpeg \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENTRYPOINT [ "bash", "-c" ]
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -25,6 +25,7 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
            set -e
            apt update
            apt install -y build-essential libsdl2-dev
            make
@ -86,9 +87,10 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
            set -e
            apt update
            apt install -y build-essential cmake libsdl2-dev
-            cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
            make
            ctest -L gh --output-on-failure'
@ -113,9 +115,10 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
            set -e
            apt update
-            apt install -y build-essential cmake libsdl2-dev
+            apt install -y clang build-essential cmake libsdl2-dev
-            cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
+            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
            make
            ctest -L gh --output-on-failure'
@ -140,6 +143,7 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
            set -e
            apt update
            apt install -y build-essential cmake
            cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
@ -162,7 +166,7 @@ jobs:
            s2arc: x64
            jnaPath: win32-x86-64
          - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
    steps:
      - name: Clone
@ -182,7 +186,7 @@ jobs:
        run: >
          cmake -S . -B ./build -A ${{ matrix.arch }}
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
+          -DWHISPER_SDL2=${{ matrix.sdl2 }}
      - name: Build
        run: |
@ -217,13 +221,16 @@ jobs:
        sdl2: [ON]
        include:
          - arch: Win32
-            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
            s2arc: x86
            clblast: OFF
          - arch: x64
-            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
            s2arc: x64
            clblast: ON
            clver: 1.6.1
          - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
    steps:
      - name: Clone
@ -239,7 +246,7 @@ jobs:
          7z x blas.zip -oblas -y
          copy blas/include/cblas.h .
          copy blas/include/openblas_config.h .
-          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
+          echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
      - name: Fetch SDL2 and set SDL2_DIR
        if: matrix.sdl2 == 'ON'
@ -248,13 +255,26 @@ jobs:
          7z x sdl2.zip
          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
      - name: Install OpenCL
        if: matrix.clblast == 'ON'
        run: vcpkg.exe --triplet=${{ matrix.arch }}-windows install opencl
      - name: Fetch CLBlast and set CLBlast_DIR
        if: matrix.clblast == 'ON'
        run: |
          C:/msys64/usr/bin/wget.exe -qO clblast.zip https://github.com/CNugteren/CLBlast/releases/download/${{ matrix.clver }}/CLBlast-${{ matrix.clver }}-windows-x64.zip
          7z x clblast.zip
          7z x CLBlast-${{ matrix.clver }}-windows-x64.7z
          echo "CLBlast_DIR=$env:GITHUB_WORKSPACE/CLBlast-${{ matrix.clver }}-windows-x64/lib/cmake/CLBlast" >> $env:GITHUB_ENV
      - name: Configure
        run: >
          cmake -S . -B ./build -A ${{ matrix.arch }}
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
+          -DWHISPER_OPENBLAS=${{ matrix.blas }}
-          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
+          -DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
-          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
+          -DWHISPER_SDL2=${{ matrix.sdl2 }}
          -DWHISPER_CLBLAST=${{ matrix.clblast }}
      - name: Build
        run: |
@ -263,17 +283,21 @@ jobs:
      - name: Copy libopenblas.dll
        if: matrix.blas == 'ON'
-        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+        run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
      - name: Copy SDL2.dll
        if: matrix.sdl2 == 'ON'
        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
      - name: Copy clblast.dll
        if: matrix.clblast == 'ON'
        run: copy "$env:CLBlast_DIR/../../clblast.dll" build/bin/${{ matrix.build }}
      - name: Upload binaries
        if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
        uses: actions/upload-artifact@v1
        with:
-          name: whisper-blas-bin-${{ matrix.arch }}
+          name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
          path: build/bin/${{ matrix.build }}
  windows-cublas:
@ -285,11 +309,12 @@ jobs:
        arch: [x64]
        cublas: [ON]
        sdl2: [ON]
        cuda-toolkit: [12.2.0, 11.8.0]
        include:
          - arch: x64
            s2arc: x64
          - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
    steps:
      - name: Clone
@ -300,7 +325,9 @@ jobs:
      - name: Install CUDA Toolkit
        id: cuda-toolkit
-        uses: Jimver/cuda-toolkit@v0.2.10
+        uses: Jimver/cuda-toolkit@v0.2.11
        with:
          cuda: '${{ matrix.cuda-toolkit }}'
      - name: Fetch SDL2 and set SDL2_DIR
        if: matrix.sdl2 == 'ON'
@ -313,12 +340,20 @@ jobs:
        run: >
          cmake -S . -B ./build -A ${{ matrix.arch }}
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DWHISPER_CUBLAS=1
+          -DWHISPER_CUBLAS=${{ matrix.cublas }}
          -DWHISPER_SDL2=${{ matrix.sdl2 }}
-      - name: Build
+      - name: Build ${{ matrix.cuda-toolkit }}
        run: |
          cd ./build
-          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+          cmake --build . --config ${{ matrix.build }}
      - name: Copy CUDA DLLs
        run: >
          Copy-Item -PassThru
          -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
          -Include cudart64_*,cublas64_*,cublasLt64_*
          -Destination build/bin/${{ matrix.build }}
      - name: Copy SDL2.dll
        if: matrix.sdl2 == 'ON'
@ -328,7 +363,7 @@ jobs:
        if: matrix.sdl2 == 'ON'
        uses: actions/upload-artifact@v1
        with:
-          name: whisper-cublas-bin-${{ matrix.arch }}
+          name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
          path: build/bin/${{ matrix.build }}
  emscripten:
@ -396,6 +431,32 @@ jobs:
          cd examples/whisper.android
          ./gradlew assembleRelease --no-daemon
  android_java:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v3
      - name: set up JDK 11
        uses: actions/setup-java@v3
        with:
          java-version: '11'
          distribution: 'temurin'
          cache: gradle
      - name: Setup Android SDK
        uses: android-actions/setup-android@v2
        with:
          api-level: 30
          build-tools-version: 30.0.3
      - name: Build
        run: |
          cd examples/whisper.android.java
          chmod +x ./gradlew 
          ./gradlew assembleRelease
  java:
    needs: [ 'windows' ]
    runs-on: windows-latest
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -0,0 +1,57 @@
 name: Publish Docker image
 on:
  pull_request:
  push:
    branches:
      - master
 jobs:
  push_to_registry:
    name: Push Docker image to Docker Hub
    if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    env:
      COMMIT_SHA: ${{ github.sha }}
    strategy:
      matrix:
        config:
          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
          - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
    steps:
      - name: Check out the repo
        uses: actions/checkout@v3
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Build and push Docker image (versioned)
        if: github.event_name == 'push'
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          platforms: ${{ matrix.config.platforms }}
          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
          file: ${{ matrix.config.dockerfile }}
      - name: Build and push Docker image (tagged)
        uses: docker/build-push-action@v4
        with:
          context: .
          push: ${{ github.event_name == 'push' }}
          platforms: ${{ matrix.config.platforms }}
          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
          file: ${{ matrix.config.dockerfile }}
--- a/.gitignore
+++ b/.gitignore
@ -8,6 +8,7 @@
 .DS_Store
 build/
 build-coreml/
 build-em/
 build-debug/
 build-release/
@ -18,6 +19,11 @@ build-no-accel/
 build-sanitize-addr/
 build-sanitize-thread/
 # SPM
 .build/
 .swiftpm
 *.metallib
 /main
 /stream
 /command
@ -25,6 +31,7 @@ build-sanitize-thread/
 /talk-llama
 /bench
 /quantize
 /server
 /lsp
 arm_neon.h
@ -48,3 +55,7 @@ bindings/java/.idea/
 .idea/
 benchmark_results.csv
 cmake-build-debug/
 .cxx/
 .gradle/
 local.properties
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)
-project(whisper.cpp VERSION 1.4.2)
+project(whisper.cpp VERSION 1.5.4)
 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
@ -218,11 +218,17 @@ if (WHISPER_CUBLAS)
        add_compile_definitions(GGML_USE_CUBLAS)
        if (WHISPER_STATIC)
            if (WIN32)
                # As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
                set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
            else ()
                set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
            endif()
        else()
            set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
        endif()
        set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
    else()
        message(FATAL_ERROR "cuBLAS not found")
    endif()
@ -338,8 +344,8 @@ else()
        endif()
    else()
        if (EMSCRIPTEN)
-            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
+            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880")
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
        else()
            if(NOT WHISPER_NO_AVX)
                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
@ -521,7 +527,13 @@ endif()
 if (GGML_SOURCES_CUDA)
    message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
-    set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
+    # Only configure gmml CUDA architectures is not globally set
    if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
        # Not overriden by user, so set defaults
        set(GGML_CUDA_ARCHITECTURES 52 61 70)
    endif()
    message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}")
    set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES})
    set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
 endif()
@ -533,7 +545,7 @@ target_compile_definitions(${TARGET} PUBLIC
    ${WHISPER_EXTRA_FLAGS}
    )
-set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
+set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "ggml.h;whisper.h")
 include(GNUInstallDirs)
--- a/56
+++ b/56
@ -1,4 +1,4 @@
-default: main bench quantize
+default: main bench quantize server
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
@ -206,7 +206,7 @@ ifdef WHISPER_CUBLAS
 	CFLAGS      += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
 	CXXFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
-	LDFLAGS     += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
+	LDFLAGS     += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
 	WHISPER_OBJ += ggml-cuda.o
 	NVCC        = nvcc
 	NVCCFLAGS   = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
@ -307,7 +307,7 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
 ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
 	$(CC)  $(CFLAGS)   -c $< -o $@
-WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o
+WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o
 whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
@ -331,14 +331,14 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
 WHISPER_OBJ += ggml-metal.o
 endif
-libwhisper.a: ggml.o $(WHISPER_OBJ)
+libwhisper.a: $(WHISPER_OBJ)
-	$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
+	$(AR) rcs libwhisper.a $(WHISPER_OBJ)
-libwhisper.so: ggml.o $(WHISPER_OBJ)
+libwhisper.so: $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
 clean:
-	rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
+	rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
 #
 # Examples
@ -349,30 +349,33 @@ CC_SDL=`sdl2-config --cflags --libs`
 SRC_COMMON     = examples/common.cpp examples/common-ggml.cpp
 SRC_COMMON_SDL = examples/common-sdl.cpp
-main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
+main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
 	./main -h
-bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
+bench: examples/bench/bench.cpp $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp $(WHISPER_OBJ) -o bench $(LDFLAGS)
-quantize: examples/quantize/quantize.cpp ggml.o $(WHISPER_OBJ) $(SRC_COMMON)
+quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
-	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o quantize $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
-lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+command: examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
-talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
 talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
 #
 # Audio samples
@ -417,9 +420,10 @@ samples:
 .PHONY: medium.en
 .PHONY: medium
 .PHONY: large-v1
-.PHONY: large
+.PHONY: large-v2
 .PHONY: large-v3
-tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
+tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
 	bash ./models/download-ggml-model.sh $@
 	@echo ""
 	@echo "==============================================="
--- a/Package.swift
+++ b/Package.swift
@ -0,0 +1,59 @@
 // swift-tools-version:5.5
 import PackageDescription
 let package = Package(
    name: "whisper",
    platforms: [
        .macOS(.v12),
        .iOS(.v14),
        .watchOS(.v4),
        .tvOS(.v14)
    ],
    products: [
        .library(name: "whisper", targets: ["whisper"]),
    ],
    dependencies: [
        .package(url: "https://github.com/ggerganov/ggml.git", .branch("master"))
    ],
    targets: [
        .target(
            name: "whisper",
            dependencies: ["ggml"],
            path: ".",
            exclude: [
               "bindings",
               "cmake",
               "coreml",
               "examples",
               "extra",
               "models",
               "samples",
               "tests",
               "CMakeLists.txt",
               "ggml-cuda.cu",
               "ggml-cuda.h",
               "Makefile"
            ],
            sources: [
                "whisper.cpp",
            ],
            publicHeadersPath: "spm-headers",
            cSettings: [
                .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
                .define("GGML_USE_ACCELERATE"),
                .unsafeFlags(["-fno-objc-arc"]),
                .define("GGML_USE_METAL")
                // NOTE: NEW_LAPACK will required iOS version 16.4+
                // We should consider add this in the future when we drop support for iOS 14
                // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
                // .define("ACCELERATE_NEW_LAPACK"),
                // .define("ACCELERATE_LAPACK_ILP64")
            ],
            linkerSettings: [
                .linkedFramework("Accelerate")
            ]
        )
    ],
    cxxLanguageStandard: .cxx11
 )
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
-Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
@ -16,12 +16,10 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
 - [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
 - Low memory usage (Flash Attention)
 - Zero memory allocations at runtime
 - Support for CPU-only inference
- [Partial GPU support for NVIDIA via cuBLAS](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
+- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
 - [Partial OpenCL GPU support via CLBlast](https://github.com/ggerganov/whisper.cpp#opencl-gpu-support-via-clblast)
 - [BLAS CPU support via OpenBLAS](https://github.com/ggerganov/whisper.cpp#blas-cpu-support-via-openblas)
 - [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
@ -35,11 +33,10 @@ Supported platforms:
 - [x] [WebAssembly](examples/whisper.wasm)
 - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
 - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
 - [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
-The entire implementation of the model is contained in 2 source files:
+The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
-
+The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library.
 - Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
 - Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
 Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
 As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
@ -114,8 +111,8 @@ options:
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
  -sow,      --split-on-word     [false  ] split on word rather than on token
-  -bo N,     --best-of N         [2      ] number of best candidates to keep
+  -bo N,     --best-of N         [5      ] number of best candidates to keep
-  -bs N,     --beam-size N       [-1     ] beam size for beam search
+  -bs N,     --beam-size N       [5      ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
@ -132,6 +129,7 @@ options:
  -fp,       --font-path         [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
  -ocsv,     --output-csv        [false  ] output result in a CSV file
  -oj,       --output-json       [false  ] output result in a JSON file
  -ojf,      --output-json-full  [false  ] include more information in the JSON file
  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
@ -143,7 +141,8 @@ options:
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -f FNAME,  --file FNAME        [       ] input WAV file path
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
-  -ls,       --log-score         [false  ] log best decoder scores of token
+  -ls,       --log-score         [false  ] log best decoder scores of tokens
  -ng,       --no-gpu            [false  ] disable GPU
 bash ./models/download-ggml-model.sh base.en
@ -234,18 +233,19 @@ make small
 make medium.en
 make medium
 make large-v1
-make large
+make large-v2
 make large-v3
 ```
 ## Memory usage
-| Model  | Disk   | Mem     | SHA                                        |
+| Model  | Disk    | Mem      |
-| ---    | ---    | ---     | ---                                        |
+| ---    | ---     | ---      |
-| tiny   |  75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
+| tiny   |  75 MiB | ~273 MB |
-| base   | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
+| base   | 142 MiB | ~388 MB |
-| small  | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
+| small  | 466 MiB | ~852 MB |
-| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
+| medium | 1.5 GiB | ~2.1 GB |
-| large  | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
+| large  | 2.9 GiB | ~3.9 GB |
 ## Quantization
@ -402,9 +402,9 @@ This can result in significant speedup in encoder performance. Here are the inst
 For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
-## NVIDIA GPU support via cuBLAS
+## NVIDIA GPU support
-With NVIDIA cards the Encoder processing can to a large extent be offloaded to the GPU through cuBLAS.
+With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
 First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
 Now build `whisper.cpp` with cuBLAS support:
@ -449,6 +449,36 @@ make clean
 WHISPER_OPENBLAS=1 make -j
 ```
 ## Docker
 ### Prerequisites
 * Docker must be installed and running on your system.
 * Create a folder to store big models & intermediate files (ex. /whisper/models)
 ### Images
 We have two Docker images available for this project:
 1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
 ### Usage
 ```shell
 # download model and persist it in a local folder
 docker run -it --rm \
  -v path/to/models:/models \
  whisper.cpp:main "./models/download-ggml-model.sh base /models"
 # transcribe an audio file
 docker run -it --rm \
  -v path/to/models:/models \
  -v path/to/audios:/audios \
  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
  -v path/to/models:/models \
  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 ## Limitations
 - Inference only
@ -771,6 +801,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
 | [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
 | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
 | [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
 | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
@ -780,6 +811,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
 | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
 | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
 | [server](examples/server) | | HTTP transcription server with OAI-like API |
 ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -1,9 +1,26 @@
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
 ifndef UNAME_P
 UNAME_P := $(shell uname -p)
 endif
 ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif
 GGML_METAL_PATH_RESOURCES := $(abspath ../..)
 BUILD_DIR := build
 MODELS_DIR := models
 EXAMPLES_DIR := $(wildcard examples/*)
 INCLUDE_PATH := $(abspath ../..)
 LIBRARY_PATH := $(abspath ../..)
 ifeq ($(UNAME_S),Darwin)
 	EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
 endif
 all: clean whisper examples
 whisper: mkdir
@ -11,8 +28,13 @@ whisper: mkdir
 	@${MAKE} -C ../.. libwhisper.a
 test: model-small whisper modtidy
 ifeq ($(UNAME_S),Darwin)
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
 else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
 endif
 examples: $(EXAMPLES_DIR)
@ -21,7 +43,11 @@ model-small: mkdir examples/go-model-download
 $(EXAMPLES_DIR): mkdir whisper modtidy
 	@echo Build example $(notdir $@)
 ifeq ($(UNAME_S),Darwin)
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
 else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
 endif
 mkdir:
 	@echo Mkdir ${BUILD_DIR}
--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -24,7 +24,7 @@ const (
 var (
 	// The models which will be downloaded, if no model is specified as an argument
-	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
+	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
 )
 var (
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -83,7 +83,6 @@ const (
 	SampleRate = C.WHISPER_SAMPLE_RATE                 // Expected sample rate, samples per second
 	SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
 	NumFFT     = C.WHISPER_N_FFT
 	NumMEL     = C.WHISPER_N_MEL
 	HopLength  = C.WHISPER_HOP_LENGTH
 	ChunkSize  = C.WHISPER_CHUNK_SIZE
 )
@ -103,7 +102,7 @@ var (
 func Whisper_init(path string) *Context {
 	cPath := C.CString(path)
 	defer C.free(unsafe.Pointer(cPath))
-	if ctx := C.whisper_init_from_file(cPath); ctx != nil {
+	if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
 		return (*Context)(ctx)
 	} else {
 		return nil
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/java/build.gradle
+++ b/bindings/java/build.gradle
@ -9,6 +9,7 @@ archivesBaseName = 'whispercpp'
 group = 'io.github.ggerganov'
 version = '1.4.0'
 sourceCompatibility = 1.8
 targetCompatibility = 1.8
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
@ -4,6 +4,7 @@ import com.sun.jna.Structure;
 import com.sun.jna.ptr.PointerByReference;
 import io.github.ggerganov.whispercpp.ggml.GgmlType;
 import io.github.ggerganov.whispercpp.WhisperModel;
 import io.github.ggerganov.whispercpp.params.WhisperContextParams;
 import java.util.List;
@ -23,8 +24,9 @@ public class WhisperContext extends Structure {
    public PointerByReference vocab;
    public PointerByReference state;
-    /** populated by whisper_init_from_file() */
+    /** populated by whisper_init_from_file_with_params() */
    String path_model;
    WhisperContextParams params;
 //    public static class ByReference extends WhisperContext implements Structure.ByReference {
 //    }
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
@ -2,12 +2,16 @@ package io.github.ggerganov.whispercpp;
 import com.sun.jna.Native;
 import com.sun.jna.Pointer;
 import io.github.ggerganov.whispercpp.bean.WhisperSegment;
 import io.github.ggerganov.whispercpp.params.WhisperContextParams;
 import io.github.ggerganov.whispercpp.params.WhisperFullParams;
 import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 /**
 * Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
@ -15,8 +19,9 @@ import java.io.IOException;
 public class WhisperCpp implements AutoCloseable {
    private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
    private Pointer ctx = null;
-    private Pointer greedyPointer = null;
+    private Pointer paramsPointer = null;
-    private Pointer beamPointer = null;
+    private Pointer greedyParamsPointer = null;
    private Pointer beamParamsPointer = null;
    public File modelDir() {
        String modelDirPath = System.getenv("XDG_CACHE_HOME");
@ -31,6 +36,18 @@ public class WhisperCpp implements AutoCloseable {
     * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
     */
    public void initContext(String modelPath) throws FileNotFoundException {
        initContextImpl(modelPath, getContextDefaultParams());
    }
    /**
     * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
     * @param params - params to use when initialising the context
     */
    public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
        initContextImpl(modelPath, params);
    }
    private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
        if (ctx != null) {
            lib.whisper_free(ctx);
        }
@ -43,13 +60,26 @@ public class WhisperCpp implements AutoCloseable {
            modelPath = new File(modelDir(), modelPath).getAbsolutePath();
        }
-        ctx = lib.whisper_init_from_file(modelPath);
+        ctx = lib.whisper_init_from_file_with_params(modelPath, params);
        if (ctx == null) {
            throw new FileNotFoundException(modelPath);
        }
    }
    /**
     * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
     * Because this function allocates memory for the params, the caller must call either:
     * - call `whisper_free_context_params()`
     * - `Native.free(Pointer.nativeValue(pointer));`
     */
    public WhisperContextParams getContextDefaultParams() {
        paramsPointer = lib.whisper_context_default_params_by_ref();
        WhisperContextParams params = new WhisperContextParams(paramsPointer);
        params.read();
        return params;
    }
    /**
     * Provides default params which can be used with `whisper_full()` etc.
     * Because this function allocates memory for the params, the caller must call either:
@ -63,15 +93,15 @@ public class WhisperCpp implements AutoCloseable {
        // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
        if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
-            if (greedyPointer == null) {
+            if (greedyParamsPointer == null) {
-                greedyPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
+                greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
            }
-            pointer = greedyPointer;
+            pointer = greedyParamsPointer;
        } else {
-            if (beamPointer == null) {
+            if (beamParamsPointer == null) {
-                beamPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
+                beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
            }
-            pointer = beamPointer;
+            pointer = beamParamsPointer;
        }
        WhisperFullParams params = new WhisperFullParams(pointer);
@ -93,13 +123,17 @@ public class WhisperCpp implements AutoCloseable {
    }
    private void freeParams() {
-        if (greedyPointer != null) {
+        if (paramsPointer != null) {
-            Native.free(Pointer.nativeValue(greedyPointer));
+            Native.free(Pointer.nativeValue(paramsPointer));
-            greedyPointer = null;
+            paramsPointer = null;
        }
-        if (beamPointer != null) {
+        if (greedyParamsPointer != null) {
-            Native.free(Pointer.nativeValue(beamPointer));
+            Native.free(Pointer.nativeValue(greedyParamsPointer));
-            beamPointer = null;
+            greedyParamsPointer = null;
        }
        if (beamParamsPointer != null) {
            Native.free(Pointer.nativeValue(beamParamsPointer));
            beamParamsPointer = null;
        }
    }
@ -129,6 +163,28 @@ public class WhisperCpp implements AutoCloseable {
        return str.toString().trim();
    }
    public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
        if (ctx == null) {
            throw new IllegalStateException("Model not initialised");
        }
        if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
            throw new IOException("Failed to process audio");
        }
        int nSegments = lib.whisper_full_n_segments(ctx);
        List<WhisperSegment> segments= new ArrayList<>(nSegments);
        for (int i = 0; i < nSegments; i++) {
            long t0 = lib.whisper_full_get_segment_t0(ctx, i);
            String text = lib.whisper_full_get_segment_text(ctx, i);
            long t1 = lib.whisper_full_get_segment_t1(ctx, i);
            segments.add(new WhisperSegment(t0,t1,text));
        }
        return segments;
    }
 //    public int getTextSegmentCount(Pointer ctx) {
 //        return lib.whisper_full_n_segments(ctx);
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
@ -5,6 +5,7 @@ import com.sun.jna.Native;
 import com.sun.jna.Pointer;
 import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
 import io.github.ggerganov.whispercpp.model.WhisperTokenData;
 import io.github.ggerganov.whispercpp.params.WhisperContextParams;
 import io.github.ggerganov.whispercpp.params.WhisperFullParams;
 public interface WhisperCppJnaLibrary extends Library {
@ -13,13 +14,32 @@ public interface WhisperCppJnaLibrary extends Library {
    String whisper_print_system_info();
    /**
-     * Allocate (almost) all memory needed for the model by loading from a file.
+     * DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
     *
     * @param path_model Path to the model file
     * @return Whisper context on success, null on failure
     */
    Pointer whisper_init_from_file(String path_model);
    /**
     * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
     * Because this function allocates memory for the params, the caller must call either:
     * - call `whisper_free_context_params()`
     * - `Native.free(Pointer.nativeValue(pointer));`
     */
    Pointer whisper_context_default_params_by_ref();
    void whisper_free_context_params(Pointer params);
    /**
     * Allocate (almost) all memory needed for the model by loading from a file.
     *
     * @param path_model Path to the model file
     * @param params     Pointer to whisper_context_params
     * @return Whisper context on success, null on failure
     */
    Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
    /**
     * Allocate (almost) all memory needed for the model by loading from a buffer.
     *
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java
@ -0,0 +1,47 @@
 package io.github.ggerganov.whispercpp.bean;
 /**
 * Created by litonglinux@qq.com on 10/21/2023_7:48 AM
 */
 public class WhisperSegment {
  private long start, end;
  private String sentence;
  public WhisperSegment() {
  }
  public WhisperSegment(long start, long end, String sentence) {
    this.start = start;
    this.end = end;
    this.sentence = sentence;
  }
  public long getStart() {
    return start;
  }
  public long getEnd() {
    return end;
  }
  public String getSentence() {
    return sentence;
  }
  public void setStart(long start) {
    this.start = start;
  }
  public void setEnd(long end) {
    this.end = end;
  }
  public void setSentence(String sentence) {
    this.sentence = sentence;
  }
  @Override
  public String toString() {
    return "[" + start + " --> " + end + "]:" + sentence;
  }
 }
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
@ -0,0 +1,31 @@
 package io.github.ggerganov.whispercpp.params;
 import com.sun.jna.*;
 import java.util.Arrays;
 import java.util.List;
 /**
 * Parameters for the whisper_init_from_file_with_params() function.
 * If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
 * whisper_context_default_params()
 */
 public class WhisperContextParams extends Structure {
    public WhisperContextParams(Pointer p) {
        super(p);
    }
    /** Use GPU for inference Number (default = true) */
    public CBool use_gpu;
    /** Use GPU for inference Number (default = true) */
    public void useGpu(boolean enable) {
        use_gpu = enable ? CBool.TRUE : CBool.FALSE;
    }
    @Override
    protected List<String> getFieldOrder() {
        return Arrays.asList("use_gpu");
    }
 }
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
@ -58,6 +58,9 @@ public class WhisperFullParams extends Structure {
        no_context = enable ? CBool.FALSE : CBool.TRUE;
    }
    /** Generate timestamps or not? */
    public CBool no_timestamps;
    /** Flag to force single segment output (useful for streaming). (default = false) */
    public CBool single_segment;
@ -304,10 +307,16 @@ public class WhisperFullParams extends Structure {
        logits_filter_callback = CallbackReference.getFunctionPointer(callback);
    }
    /** Grammar stuff */
    public Pointer grammar_rules;
    public long n_grammar_rules;
    public long i_start_rule;
    public float grammar_penalty;
    @Override
    protected List<String> getFieldOrder() {
        return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
-                "no_context", "single_segment",
+                "no_context", "single_segment", "no_timestamps",
                "print_special", "print_progress", "print_realtime", "print_timestamps",  "token_timestamps",
                "thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "speed_up", "audio_ctx",
                "tdrz_enable", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
@ -316,6 +325,7 @@ public class WhisperFullParams extends Structure {
                "new_segment_callback", "new_segment_callback_user_data",
                "progress_callback", "progress_callback_user_data",
                "encoder_begin_callback", "encoder_begin_callback_user_data",
-                "logits_filter_callback", "logits_filter_callback_user_data");
+                "logits_filter_callback", "logits_filter_callback_user_data",
                "grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
    }
 }
--- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
+++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp;
 import static org.junit.jupiter.api.Assertions.*;
 import io.github.ggerganov.whispercpp.bean.WhisperSegment;
 import io.github.ggerganov.whispercpp.params.CBool;
 import io.github.ggerganov.whispercpp.params.WhisperFullParams;
 import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
@ -11,6 +12,7 @@ import javax.sound.sampled.AudioInputStream;
 import javax.sound.sampled.AudioSystem;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.util.List;
 class WhisperCppTest {
    private static WhisperCpp whisper = new WhisperCpp();
@ -20,11 +22,12 @@ class WhisperCppTest {
    static void init() throws FileNotFoundException {
        // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
        // or you can provide the absolute path to the model file.
        //String modelName = "../../models/ggml-tiny.bin";
        String modelName = "../../models/ggml-tiny.en.bin";
        try {
            whisper.initContext(modelName);
-//            whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
+            //whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-//            whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
+            //whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
            modelInitialised = true;
        } catch (FileNotFoundException ex) {
            System.out.println("Model " + modelName + " not found");
@ -42,7 +45,7 @@ class WhisperCppTest {
        assertEquals(16384, params.n_max_text_ctx);
        assertFalse(params.translate);
        assertEquals(0.01f, params.thold_pt);
-        assertEquals(2, params.beam_search.beam_size);
+        assertEquals(5, params.beam_search.beam_size);
        assertEquals(-1.0f, params.beam_search.patience);
    }
@ -55,7 +58,7 @@ class WhisperCppTest {
        assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
        assertNotEquals(0, params.n_threads);
        assertEquals(16384, params.n_max_text_ctx);
-        assertEquals(2, params.greedy.best_of);
+        assertEquals(5, params.greedy.best_of);
    }
    @Test
@ -72,11 +75,11 @@ class WhisperCppTest {
        byte[] b = new byte[audioInputStream.available()];
        float[] floats = new float[b.length / 2];
-//        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
+        //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
        params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
        params.print_progress = CBool.FALSE;
-//        params.initial_prompt = "and so my fellow Americans um, like";
+        //params.initial_prompt = "and so my fellow Americans um, like";
        try {
@ -99,4 +102,43 @@ class WhisperCppTest {
            audioInputStream.close();
        }
    }
    @Test
    void testFullTranscribeWithTime() throws Exception {
        if (!modelInitialised) {
            System.out.println("Model not initialised, skipping test");
            return;
        }
        // Given
        File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
        AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
        byte[] b = new byte[audioInputStream.available()];
        float[] floats = new float[b.length / 2];
        //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
        params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
        params.print_progress = CBool.FALSE;
        //params.initial_prompt = "and so my fellow Americans um, like";
        try {
            audioInputStream.read(b);
            for (int i = 0, j = 0; i < b.length; i += 2, j++) {
                int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
                floats[j] = intSample / 32767.0f;
            }
            List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
            assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
            for (WhisperSegment segment : segments) {
                System.out.println(segment);
            }
        } finally {
            audioInputStream.close();
        }
    }
 }
--- a/bindings/javascript/emscripten.cpp
+++ b/bindings/javascript/emscripten.cpp
@ -20,7 +20,7 @@ struct whisper_context * g_context;
 EMSCRIPTEN_BINDINGS(whisper) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        if (g_context == nullptr) {
-            g_context = whisper_init_from_file(path_model.c_str());
+            g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
            if (g_context != nullptr) {
                return true;
            } else {
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.4.2",
+  "version": "1.5.4",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/bindings/ruby/ext/ggml-backend-impl.h
+++ b/bindings/ruby/ext/ggml-backend-impl.h
@ -70,7 +70,7 @@ extern "C" {
        void                      (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
        // compute graph without a plan
-        void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+        bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
        // check if the backend supports an operation
        bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
--- a/bindings/ruby/ext/ggml-backend.c
+++ b/bindings/ruby/ext/ggml-backend.c
@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
    backend->iface.graph_plan_compute(backend, plan);
 }
-void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
-    backend->iface.graph_compute(backend, cgraph);
+    return backend->iface.graph_compute(backend, cgraph);
 }
 bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
--- a/bindings/ruby/ext/ggml-backend.h
+++ b/bindings/ruby/ext/ggml-backend.h
@ -52,7 +52,7 @@ extern "C" {
    GGML_API void ggml_backend_graph_plan_free   (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
    GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
-    GGML_API void ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API bool ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph);
    GGML_API bool ggml_backend_supports_op       (ggml_backend_t backend, const struct ggml_tensor * op);
    // tensor copy between different backends
--- a/bindings/ruby/ext/ruby_whisper.cpp
+++ b/bindings/ruby/ext/ruby_whisper.cpp
@ -87,7 +87,7 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
  if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
    rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
  }
-  rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
+  rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
  if (rw->context == nullptr) {
    rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
  }
--- a/coreml/whisper-encoder-impl.h
+++ b/coreml/whisper-encoder-impl.h
@ -123,7 +123,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
 /**
    Make a prediction using the convenience interface
-    @param logmel_data as 1 × 80 × 3000 3-dimensional array of floats:
+    @param logmel_data as 1 × n_mel × 3000 3-dimensional array of floats:
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the prediction as whisper_encoder_implOutput
 */
--- a/coreml/whisper-encoder.h
+++ b/coreml/whisper-encoder.h
@ -3,6 +3,8 @@
 // Code is derived from the work of Github user @wangchou
 // ref: https://github.com/wangchou/callCoreMLFromCpp
 #include <stdint.h>
 #if __cplusplus
 extern "C" {
 #endif
@ -14,6 +16,8 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx);
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                             int64_t   n_ctx,
                             int64_t   n_mel,
                               float * mel,
                               float * out);
--- a/coreml/whisper-encoder.mm
+++ b/coreml/whisper-encoder.mm
@ -24,9 +24,9 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
    // select which device to run the Core ML model on
    MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
-    config.computeUnits = MLComputeUnitsCPUAndGPU;
+    // config.computeUnits = MLComputeUnitsCPUAndGPU;
    //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
-    //config.computeUnits = MLComputeUnitsAll;
+    config.computeUnits = MLComputeUnitsAll;
    const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
@ -48,13 +48,15 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx) {
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                             int64_t   n_ctx,
                             int64_t   n_mel,
                               float * mel,
                               float * out) {
    MLMultiArray * inMultiArray = [
        [MLMultiArray alloc] initWithDataPointer: mel
-                                           shape: @[@1, @80, @3000]
+                                           shape: @[@1, @(n_mel), @(n_ctx)]
                                        dataType: MLMultiArrayDataTypeFloat32
-                                         strides: @[@(240000), @(3000), @1]
+                                         strides: @[@(n_ctx*n_mel), @(n_ctx), @1]
                                     deallocator: nil
                                           error: nil
    ];
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -14,6 +14,10 @@ if (WHISPER_SDL2)
    message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
 endif()
 if (WHISPER_CLBLAST)
    find_package(CLBlast REQUIRED)
 endif()
 # common
 set(TARGET common)
@ -23,6 +27,7 @@ add_library(${TARGET} STATIC
    common.cpp
    common-ggml.h
    common-ggml.cpp
    grammar-parser.cpp
    )
 include(DefaultTargetOptions)
@ -64,6 +69,7 @@ elseif(CMAKE_JS_VERSION)
 else()
    add_subdirectory(main)
    add_subdirectory(stream)
    add_subdirectory(server)
    add_subdirectory(command)
    add_subdirectory(bench)
    add_subdirectory(quantize)
@ -71,3 +77,5 @@ else()
    add_subdirectory(talk-llama)
    add_subdirectory(lsp)
 endif()
 add_subdirectory(wchess)
--- a/examples/addon.node/test/whisper.spec.js
+++ b/examples/addon.node/test/whisper.spec.js
@ -11,6 +11,7 @@ const whisperParamsMock = {
  language: "en",
  model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
  fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
  use_gpu: true,
 };
 describe("Run whisper.node", () => {
--- a/examples/addon.node/addon.cpp
+++ b/examples/addon.node/addon.cpp
@ -36,6 +36,7 @@ struct whisper_params {
    bool print_colors   = false;
    bool print_progress = false;
    bool no_timestamps  = false;
    bool use_gpu        = true;
    std::string language = "en";
    std::string prompt;
@ -153,7 +154,9 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
    // whisper init
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    if (ctx == nullptr) {
        fprintf(stderr, "error: failed to initialize whisper context\n");
@ -315,10 +318,12 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
  std::string language = whisper_params.Get("language").As<Napi::String>();
  std::string model = whisper_params.Get("model").As<Napi::String>();
  std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
  bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
  params.language = language;
  params.model = model;
  params.fname_inp.emplace_back(input);
  params.use_gpu = use_gpu;
  Napi::Function callback = info[1].As<Napi::Function>();
  Worker* worker = new Worker(callback, params);
--- a/examples/addon.node/index.js
+++ b/examples/addon.node/index.js
@ -11,6 +11,7 @@ const whisperParams = {
  language: "en",
  model: path.join(__dirname, "../../models/ggml-base.en.bin"),
  fname_inp: "../../samples/jfk.wav",
  use_gpu: true,
 };
 const arguments = process.argv.slice(2);
--- a/examples/bench.wasm/emscripten.cpp
+++ b/examples/bench.wasm/emscripten.cpp
@ -23,7 +23,9 @@ void bench_main(size_t index) {
    fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);
-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+    const int n_mels = whisper_model_n_mels(ctx);
    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
        fprintf(stderr, "error: failed to set mel: %d\n", ret);
        return;
    }
@ -57,7 +59,7 @@ EMSCRIPTEN_BINDINGS(bench) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file(path_model.c_str());
+                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    if (g_worker.joinable()) {
                        g_worker.join();
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -11,6 +11,8 @@ struct whisper_params {
    int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
    std::string model = "models/ggml-base.en.bin";
    bool use_gpu = true;
 };
 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -26,6 +28,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-t"  || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
        else if (arg == "-m"  || arg == "--model")   { params.model     = argv[++i]; }
        else if (arg == "-w"  || arg == "--what")    { params.what      = atoi(argv[++i]); }
        else if (arg == "-ng" || arg == "--no-gpu")  { params.use_gpu   = false; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -45,6 +48,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "                           %-7s  0 - whisper\n",                                 "");
    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
@ -54,7 +58,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
 int whisper_bench_full(const whisper_params & params) {
    // whisper init
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    {
        fprintf(stderr, "\n");
@ -66,13 +73,15 @@ int whisper_bench_full(const whisper_params & params) {
        return 2;
    }
-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+    const int n_mels = whisper_model_n_mels(ctx);
    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
        fprintf(stderr, "error: failed to set mel: %d\n", ret);
        return 3;
    }
    // heat encoder
    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        fprintf(stderr, "error: failed to encode: %d\n", ret);
        return 4;
    }
@ -81,13 +90,13 @@ int whisper_bench_full(const whisper_params & params) {
    // prompt heat
    if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        fprintf(stderr, "error: failed to decode: %d\n", ret);
        return 4;
    }
    // text-generation heat
    if (int ret = whisper_decode(ctx, tokens, 1, 256, params.n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        fprintf(stderr, "error: failed to decode: %d\n", ret);
        return 4;
    }
@ -95,20 +104,30 @@ int whisper_bench_full(const whisper_params & params) {
    // actual run
    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
-        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        fprintf(stderr, "error: failed to encode: %d\n", ret);
        return 4;
    }
-    for (int i = 0; i < 16; i++) {
+    // text-generation
        if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
            fprintf(stderr, "error: failed to encode model: %d\n", ret);
            return 4;
        }
    }
    for (int i = 0; i < 256; i++) {
        if (int ret = whisper_decode(ctx, tokens, 1, i, params.n_threads) != 0) {
-            fprintf(stderr, "error: failed to encode model: %d\n", ret);
+            fprintf(stderr, "error: failed to decode: %d\n", ret);
            return 4;
        }
    }
    // batched decoding
    for (int i = 0; i < 64; i++) {
        if (int ret = whisper_decode(ctx, tokens, 5, 0, params.n_threads) != 0) {
            fprintf(stderr, "error: failed to decode: %d\n", ret);
            return 4;
        }
    }
    // prompt processing
    for (int i = 0; i < 16; i++) {
        if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
            fprintf(stderr, "error: failed to decode: %d\n", ret);
            return 4;
        }
    }
--- a/examples/command.wasm/emscripten.cpp
+++ b/examples/command.wasm/emscripten.cpp
@ -243,7 +243,7 @@ EMSCRIPTEN_BINDINGS(command) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file(path_model.c_str());
+                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -9,6 +9,7 @@
 #include "common-sdl.h"
 #include "common.h"
 #include "whisper.h"
 #include "grammar-parser.h"
 #include <sstream>
 #include <cassert>
@ -21,6 +22,11 @@
 #include <vector>
 #include <map>
 bool file_exists(const std::string & fname) {
    std::ifstream f(fname.c_str());
    return f.good();
 }
 // command-line parameters
 struct whisper_params {
    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
@ -33,17 +39,24 @@ struct whisper_params {
    float vad_thold  = 0.6f;
    float freq_thold = 100.0f;
    float grammar_penalty = 100.0f;
    grammar_parser::parse_state grammar_parsed;
    bool speed_up      = false;
    bool translate     = false;
    bool print_special = false;
    bool print_energy  = false;
    bool no_timestamps = true;
    bool use_gpu       = true;
    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
    std::string fname_out;
    std::string commands;
    std::string prompt;
    std::string context;
    std::string grammar;
 };
 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -68,11 +81,15 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
        else if (arg == "-cmd" || arg == "--commands")      { params.commands      = argv[++i]; }
        else if (arg == "-p"   || arg == "--prompt")        { params.prompt        = argv[++i]; }
        else if (arg == "-ctx" || arg == "--context")       { params.context       = argv[++i]; }
        else if (                 arg == "--grammar")       { params.grammar       = argv[++i]; }
        else if (                 arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -101,21 +118,36 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "  -f FNAME,   --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
    fprintf(stderr, "  -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n",             params.commands.c_str());
    fprintf(stderr, "  -p,         --prompt         [%-7s] the required activation prompt\n",              params.prompt.c_str());
    fprintf(stderr, "  -ctx,       --context        [%-7s] sample text to help the transcription\n",       params.context.c_str());
    fprintf(stderr, "  --grammar GRAMMAR            [%-7s] GBNF grammar to guide decoding\n",              params.grammar.c_str());
    fprintf(stderr, "  --grammar-penalty N          [%-7.1f] scales down logits of nongrammar tokens\n",   params.grammar_penalty);
    fprintf(stderr, "\n");
 }
-std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
+std::string transcribe(
                 whisper_context * ctx,
            const whisper_params & params,
        const std::vector<float> & pcmf32,
               const std::string & grammar_rule,
                           float & logprob_min,
                           float & logprob_sum,
                             int & n_tokens,
                         int64_t & t_ms) {
    const auto t_start = std::chrono::high_resolution_clock::now();
-    prob = 0.0f;
+    logprob_min = 0.0f;
    logprob_sum = 0.0f;
    n_tokens    = 0;
    t_ms = 0;
-    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+    //whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
    wparams.print_progress   = false;
    wparams.print_special    = params.print_special;
@ -123,6 +155,7 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
    wparams.print_timestamps = !params.no_timestamps;
    wparams.translate        = params.translate;
    wparams.no_context       = true;
    wparams.no_timestamps    = params.no_timestamps;
    wparams.single_segment   = true;
    wparams.max_tokens       = params.max_tokens;
    wparams.language         = params.language.c_str();
@ -131,11 +164,32 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
    wparams.audio_ctx = params.audio_ctx;
    wparams.speed_up  = params.speed_up;
    wparams.temperature     = 0.4f;
    wparams.temperature_inc = 1.0f;
    wparams.greedy.best_of  = 5;
    wparams.beam_search.beam_size = 5;
    wparams.initial_prompt = params.context.data();
    const auto & grammar_parsed = params.grammar_parsed;
    auto grammar_rules = grammar_parsed.c_rules();
    if (!params.grammar_parsed.rules.empty() && !grammar_rule.empty()) {
        if (grammar_parsed.symbol_ids.find(grammar_rule) == grammar_parsed.symbol_ids.end()) {
            fprintf(stderr, "%s: warning: grammar rule '%s' not found - skipping grammar sampling\n", __func__, grammar_rule.c_str());
        } else {
            wparams.grammar_rules   = grammar_rules.data();
            wparams.n_grammar_rules = grammar_rules.size();
            wparams.i_start_rule    = grammar_parsed.symbol_ids.at(grammar_rule);
            wparams.grammar_penalty = params.grammar_penalty;
        }
    }
    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
        return "";
    }
    int prob_n = 0;
    std::string result;
    const int n_segments = whisper_full_n_segments(ctx);
@ -144,19 +198,17 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
        result += text;
-        const int n_tokens = whisper_full_n_tokens(ctx, i);
+        const int n = whisper_full_n_tokens(ctx, i);
-        for (int j = 0; j < n_tokens; ++j) {
+        for (int j = 0; j < n; ++j) {
            const auto token = whisper_full_get_token_data(ctx, i, j);
-            prob += token.p;
+            if(token.plog > 0.0f) exit(0);
-            ++prob_n;
+            logprob_min = std::min(logprob_min, token.plog);
            logprob_sum += token.plog;
            ++n_tokens;
        }
    }
    if (prob_n > 0) {
        prob /= prob_n;
    }
    const auto t_end = std::chrono::high_resolution_clock::now();
    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
@ -415,7 +467,9 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
    bool is_running = true;
    bool ask_prompt = true;
-    float prob = 0.0f;
+    float logprob_min = 0.0f;
    float logprob_sum = 0.0f;
    int   n_tokens    = 0;
    std::vector<float> pcmf32_cur;
@ -453,7 +507,7 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
                // detect the commands
                audio.get(params.command_ms, pcmf32_cur);
-                const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
+                const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "", logprob_min, logprob_sum, n_tokens, t_ms));
                const auto words = get_words(txt);
@ -489,18 +543,27 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
 // general-purpose mode
 // freely transcribe the voice into text
-int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
+int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
    bool is_running  = true;
    bool have_prompt = false;
    bool ask_prompt  = true;
-    float prob0 = 0.0f;
+    float logprob_min0 = 0.0f;
-    float prob  = 0.0f;
+    float logprob_min  = 0.0f;
    float logprob_sum0 = 0.0f;
    float logprob_sum  = 0.0f;
    int n_tokens0 = 0;
    int n_tokens  = 0;
    std::vector<float> pcmf32_cur;
    std::vector<float> pcmf32_prompt;
-    const std::string k_prompt = "Ok Whisper, start listening for commands.";
+    std::string k_prompt = "Ok Whisper, start listening for commands.";
    if (!params.prompt.empty()) {
        k_prompt = params.prompt;
    }
    fprintf(stderr, "\n");
    fprintf(stderr, "%s: general-purpose mode\n", __func__);
@ -533,9 +596,11 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
                    // wait for activation phrase
                    audio.get(params.prompt_ms, pcmf32_cur);
-                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob0, t_ms));
+                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "prompt", logprob_min0, logprob_sum0, n_tokens0, t_ms));
-                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
+                    const float p = 100.0f * std::exp(logprob_min0);
                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms, p = %.2f%%)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms, p);
                    const float sim = similarity(txt, k_prompt);
@ -556,19 +621,30 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
                    // we have heard the activation phrase, now detect the commands
                    audio.get(params.command_ms, pcmf32_cur);
                    //printf("len prompt:  %.4f\n", pcmf32_prompt.size() / (float) WHISPER_SAMPLE_RATE);
                    //printf("len command: %.4f\n", pcmf32_cur.size() / (float) WHISPER_SAMPLE_RATE);
                    // prepend 3 second of silence
                    pcmf32_cur.insert(pcmf32_cur.begin(), 3.0f*WHISPER_SAMPLE_RATE, 0.0f);
                    // prepend the prompt audio
                    pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
+                    const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "root", logprob_min, logprob_sum, n_tokens, t_ms));
-                    prob = 100.0f*(prob - prob0);
+                    //const float p = 100.0f * std::exp((logprob - logprob0) / (n_tokens - n_tokens0));
                    const float p = 100.0f * std::exp(logprob_min);
                    //fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
                    // find the prompt in the text
                    float best_sim = 0.0f;
                    size_t best_len = 0;
-                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
+                    for (size_t n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
                        if (n >= txt.size()) {
                            break;
                        }
                        const auto prompt = txt.substr(0, n);
                        const float sim = similarity(prompt, k_prompt);
@ -581,9 +657,16 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
                        }
                    }
                    fprintf(stdout, "%s:   DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
                    if (best_len == 0) {
                        fprintf(stdout, "%s: WARNING: command not recognized, try again\n", __func__);
                    } else {
                        // cut the prompt from the decoded text
                        const std::string command = ::trim(txt.substr(best_len));
                        fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                    }
                    fprintf(stdout, "\n");
                }
@ -610,7 +693,10 @@ int main(int argc, char ** argv) {
    // whisper init
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    // print some info about the processing
    {
@ -648,13 +734,37 @@ int main(int argc, char ** argv) {
    int  ret_val = 0;
    if (!params.grammar.empty()) {
        auto & grammar = params.grammar_parsed;
        if (file_exists(params.grammar.c_str())) {
            // read grammar from file
            std::ifstream ifs(params.grammar.c_str());
            const std::string txt = std::string((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
            grammar = grammar_parser::parse(txt.c_str());
        } else {
            // read grammar from string
            grammar = grammar_parser::parse(params.grammar.c_str());
        }
        // will be empty (default) if there are parse errors
        if (grammar.rules.empty()) {
            ret_val = 1;
        } else {
            fprintf(stderr, "%s: grammar:\n", __func__);
            grammar_parser::print_grammar(stderr, grammar);
            fprintf(stderr, "\n");
        }
    }
    if (ret_val == 0) {
        if (!params.commands.empty()) {
            ret_val = process_command_list(ctx, audio, params);
-    } else if (!params.prompt.empty()) {
+        } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
            ret_val = always_prompt_transcription(ctx, audio, params);
        } else {
            ret_val = process_general_transcription(ctx, audio, params);
        }
    }
    audio.pause();
--- a/examples/common-ggml.cpp
+++ b/examples/common-ggml.cpp
@ -9,6 +9,11 @@ static const std::map<std::string, enum ggml_ftype> GGML_FTYPE_MAP = {
    {"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
    {"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
    {"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
    {"q2_k", GGML_FTYPE_MOSTLY_Q2_K},
    {"q3_k", GGML_FTYPE_MOSTLY_Q3_K},
    {"q4_k", GGML_FTYPE_MOSTLY_Q4_K},
    {"q5_k", GGML_FTYPE_MOSTLY_Q5_K},
    {"q6_k", GGML_FTYPE_MOSTLY_Q6_K},
 };
 void ggml_print_ftypes(FILE * fp) {
@ -48,15 +53,15 @@ bool ggml_common_quantize_0(
        case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
        case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
        case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
        case GGML_FTYPE_MOSTLY_Q2_K: qtype = GGML_TYPE_Q2_K; break;
        case GGML_FTYPE_MOSTLY_Q3_K: qtype = GGML_TYPE_Q3_K; break;
        case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break;
        case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break;
        case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break;
        case GGML_FTYPE_UNKNOWN:
        case GGML_FTYPE_ALL_F32:
        case GGML_FTYPE_MOSTLY_F16:
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
        case GGML_FTYPE_MOSTLY_Q2_K:
        case GGML_FTYPE_MOSTLY_Q3_K:
        case GGML_FTYPE_MOSTLY_Q4_K:
        case GGML_FTYPE_MOSTLY_Q5_K:
        case GGML_FTYPE_MOSTLY_Q6_K:
                {
                    fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
                    return false;
@ -167,24 +172,17 @@ bool ggml_common_quantize_0(
            switch ((ggml_type) ttype) {
                case GGML_TYPE_Q4_0:
                    {
                        cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
                    } break;
                case GGML_TYPE_Q4_1:
                    {
                        cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
                    } break;
                case GGML_TYPE_Q5_0:
                    {
                        cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
                    } break;
                case GGML_TYPE_Q5_1:
                    {
                        cur_size = ggml_quantize_q5_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
                    } break;
                case GGML_TYPE_Q8_0:
                case GGML_TYPE_Q2_K:
                case GGML_TYPE_Q3_K:
                case GGML_TYPE_Q4_K:
                case GGML_TYPE_Q5_K:
                case GGML_TYPE_Q6_K:
                    {
-                        cur_size = ggml_quantize_q8_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+                        cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements, hist_cur.data());
                    } break;
                case GGML_TYPE_F32:
                case GGML_TYPE_F16:
@ -192,11 +190,6 @@ bool ggml_common_quantize_0(
                case GGML_TYPE_I16:
                case GGML_TYPE_I32:
                case GGML_TYPE_Q8_1:
                case GGML_TYPE_Q2_K:
                case GGML_TYPE_Q3_K:
                case GGML_TYPE_Q4_K:
                case GGML_TYPE_Q5_K:
                case GGML_TYPE_Q6_K:
                case GGML_TYPE_Q8_K:
                case GGML_TYPE_COUNT:
                    {
--- a/examples/common-sdl.cpp
+++ b/examples/common-sdl.cpp
@ -139,10 +139,13 @@ void audio_async::callback(uint8_t * stream, int len) {
        return;
    }
-    const size_t n_samples = len / sizeof(float);
+    size_t n_samples = len / sizeof(float);
-    m_audio_new.resize(n_samples);
+    if (n_samples > m_audio.size()) {
-    memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));
+        n_samples = m_audio.size();
        stream += (len - (n_samples * sizeof(float)));
    }
    //fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
@ -153,7 +156,7 @@ void audio_async::callback(uint8_t * stream, int len) {
            const size_t n0 = m_audio.size() - m_audio_pos;
            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
-            memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));
+            memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
            m_audio_len = m_audio.size();
--- a/examples/common-sdl.h
+++ b/examples/common-sdl.h
@ -41,7 +41,6 @@ private:
    std::mutex       m_mutex;
    std::vector<float> m_audio;
    std::vector<float> m_audio_new;
    size_t             m_audio_pos = 0;
    size_t             m_audio_len = 0;
 };
--- a/examples/common.h
+++ b/examples/common.h
@ -181,7 +181,7 @@ private:
    // It is assumed that PCM data is normalized to a range from -1 to 1
    bool write_audio(const float * data, size_t length) {
        for (size_t i = 0; i < length; ++i) {
-            const auto intSample = static_cast<const int16_t>(data[i] * 32767);
+            const int16_t intSample = data[i] * 32767;
            file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
            dataSize += sizeof(int16_t);
        }
--- a/examples/grammar-parser.cpp
+++ b/examples/grammar-parser.cpp
@ -0,0 +1,423 @@
 #include "grammar-parser.h"
 #include <cstdint>
 #include <cwchar>
 #include <string>
 #include <utility>
 #include <stdexcept>
 #include <exception>
 namespace grammar_parser {
    // NOTE: assumes valid utf8 (but checks for overrun)
    // copied from whisper.cpp
    std::pair<uint32_t, const char *> decode_utf8(const char * src) {
        static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
        uint8_t  first_byte = static_cast<uint8_t>(*src);
        uint8_t  highbits   = first_byte >> 4;
        int      len        = lookup[highbits];
        uint8_t  mask       = (1 << (8 - len)) - 1;
        uint32_t value      = first_byte & mask;
        const char * end    = src + len; // may overrun!
        const char * pos    = src + 1;
        for ( ; pos < end && *pos; pos++) {
            value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
        }
        return std::make_pair(value, pos);
    }
    uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
        uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
        auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
        return result.first->second;
    }
    uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
        uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
        state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
        return next_id;
    }
    void add_rule(
            parse_state & state,
            uint32_t      rule_id,
            const std::vector<whisper_grammar_element> & rule) {
        if (state.rules.size() <= rule_id) {
            state.rules.resize(rule_id + 1);
        }
        state.rules[rule_id] = rule;
    }
    bool is_word_char(char c) {
        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
    }
    std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
        const char * pos   = src;
        const char * end   = src + size;
        uint32_t     value = 0;
        for ( ; pos < end && *pos; pos++) {
            value <<= 4;
            char c = *pos;
            if ('a' <= c && c <= 'f') {
                value += c - 'a' + 10;
            } else if ('A' <= c && c <= 'F') {
                value += c - 'A' + 10;
            } else if ('0' <= c && c <= '9') {
                value += c - '0';
            } else {
                break;
            }
        }
        if (pos != end) {
            throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
        }
        return std::make_pair(value, pos);
    }
    const char * parse_space(const char * src, bool newline_ok) {
        const char * pos = src;
        while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
                (newline_ok && (*pos == '\r' || *pos == '\n'))) {
            if (*pos == '#') {
                while (*pos && *pos != '\r' && *pos != '\n') {
                    pos++;
                }
            } else {
                pos++;
            }
        }
        return pos;
    }
    const char * parse_name(const char * src) {
        const char * pos = src;
        while (is_word_char(*pos)) {
            pos++;
        }
        if (pos == src) {
            throw std::runtime_error(std::string("expecting name at ") + src);
        }
        return pos;
    }
    std::pair<uint32_t, const char *> parse_char(const char * src) {
        if (*src == '\\') {
            switch (src[1]) {
                case 'x': return parse_hex(src + 2, 2);
                case 'u': return parse_hex(src + 2, 4);
                case 'U': return parse_hex(src + 2, 8);
                case 't': return std::make_pair('\t', src + 2);
                case 'r': return std::make_pair('\r', src + 2);
                case 'n': return std::make_pair('\n', src + 2);
                case '\\':
                case '"':
                case '[':
                case ']':
                    return std::make_pair(src[1], src + 2);
                default:
                    throw std::runtime_error(std::string("unknown escape at ") + src);
            }
        } else if (*src) {
            return decode_utf8(src);
        }
        throw std::runtime_error("unexpected end of input");
    }
    const char * parse_alternates(
            parse_state       & state,
            const char        * src,
            const std::string & rule_name,
            uint32_t            rule_id,
            bool                is_nested);
    const char * parse_sequence(
            parse_state                        & state,
            const char                         * src,
            const std::string                  & rule_name,
            std::vector<whisper_grammar_element> & out_elements,
            bool                                 is_nested) {
        size_t last_sym_start = out_elements.size();
        const char * pos = src;
        while (*pos) {
            if (*pos == '"') { // literal string
                pos++;
                last_sym_start = out_elements.size();
                while (*pos != '"') {
                    auto char_pair = parse_char(pos);
                         pos       = char_pair.second;
                    out_elements.push_back({WHISPER_GRETYPE_CHAR, char_pair.first});
                }
                pos = parse_space(pos + 1, is_nested);
            } else if (*pos == '[') { // char range(s)
                pos++;
                enum whisper_gretype start_type = WHISPER_GRETYPE_CHAR;
                if (*pos == '^') {
                    pos++;
                    start_type = WHISPER_GRETYPE_CHAR_NOT;
                }
                last_sym_start = out_elements.size();
                while (*pos != ']') {
                    auto char_pair = parse_char(pos);
                         pos       = char_pair.second;
                    enum whisper_gretype type = last_sym_start < out_elements.size()
                        ? WHISPER_GRETYPE_CHAR_ALT
                        : start_type;
                    out_elements.push_back({type, char_pair.first});
                    if (pos[0] == '-' && pos[1] != ']') {
                        auto endchar_pair = parse_char(pos + 1);
                             pos          = endchar_pair.second;
                        out_elements.push_back({WHISPER_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
                    }
                }
                pos = parse_space(pos + 1, is_nested);
            } else if (is_word_char(*pos)) { // rule reference
                const char * name_end    = parse_name(pos);
                uint32_t     ref_rule_id = get_symbol_id(state, pos, name_end - pos);
                pos = parse_space(name_end, is_nested);
                last_sym_start = out_elements.size();
                out_elements.push_back({WHISPER_GRETYPE_RULE_REF, ref_rule_id});
            } else if (*pos == '(') { // grouping
                // parse nested alternates into synthesized rule
                pos = parse_space(pos + 1, true);
                uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
                pos = parse_alternates(state, pos, rule_name, sub_rule_id, true);
                last_sym_start = out_elements.size();
                // output reference to synthesized rule
                out_elements.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
                if (*pos != ')') {
                    throw std::runtime_error(std::string("expecting ')' at ") + pos);
                }
                pos = parse_space(pos + 1, is_nested);
            } else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator
                if (last_sym_start == out_elements.size()) {
                    throw std::runtime_error(std::string("expecting preceeding item to */+/? at ") + pos);
                }
                // apply transformation to previous symbol (last_sym_start to end) according to
                // rewrite rules:
                // S* --> S' ::= S S' |
                // S+ --> S' ::= S S' | S
                // S? --> S' ::= S |
                uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
                std::vector<whisper_grammar_element> sub_rule;
                // add preceding symbol to generated rule
                sub_rule.insert(
                    sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end());
                if (*pos == '*' || *pos == '+') {
                    // cause generated rule to recurse
                    sub_rule.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
                }
                // mark start of alternate def
                sub_rule.push_back({WHISPER_GRETYPE_ALT, 0});
                if (*pos == '+') {
                    // add preceding symbol as alternate only for '+' (otherwise empty)
                    sub_rule.insert(
                        sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end());
                }
                sub_rule.push_back({WHISPER_GRETYPE_END, 0});
                add_rule(state, sub_rule_id, sub_rule);
                // in original rule, replace previous symbol with reference to generated rule
                out_elements.resize(last_sym_start);
                out_elements.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
                pos = parse_space(pos + 1, is_nested);
            } else {
                break;
            }
        }
        return pos;
    }
    const char * parse_alternates(
            parse_state       & state,
            const char        * src,
            const std::string & rule_name,
            uint32_t            rule_id,
            bool                is_nested) {
        std::vector<whisper_grammar_element> rule;
        const char * pos = parse_sequence(state, src, rule_name, rule, is_nested);
        while (*pos == '|') {
            rule.push_back({WHISPER_GRETYPE_ALT, 0});
            pos = parse_space(pos + 1, true);
            pos = parse_sequence(state, pos, rule_name, rule, is_nested);
        }
        rule.push_back({WHISPER_GRETYPE_END, 0});
        add_rule(state, rule_id, rule);
        return pos;
    }
    const char * parse_rule(parse_state & state, const char * src) {
        const char * name_end = parse_name(src);
        const char * pos      = parse_space(name_end, false);
        size_t       name_len = name_end - src;
        uint32_t     rule_id  = get_symbol_id(state, src, name_len);
        const std::string name(src, name_len);
        if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
            throw std::runtime_error(std::string("expecting ::= at ") + pos);
        }
        pos = parse_space(pos + 3, true);
        pos = parse_alternates(state, pos, name, rule_id, false);
        if (*pos == '\r') {
            pos += pos[1] == '\n' ? 2 : 1;
        } else if (*pos == '\n') {
            pos++;
        } else if (*pos) {
            throw std::runtime_error(std::string("expecting newline or end at ") + pos);
        }
        return parse_space(pos, true);
    }
    parse_state parse(const char * src) {
        try {
            parse_state state;
            const char * pos = parse_space(src, true);
            while (*pos) {
                pos = parse_rule(state, pos);
            }
            return state;
        } catch (const std::exception & err) {
            fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
            return parse_state();
        }
    }
    void print_grammar_char(FILE * file, uint32_t c) {
        if (0x20 <= c && c <= 0x7f) {
            fprintf(file, "%c", static_cast<char>(c));
        } else {
            // cop out of encoding UTF-8
            fprintf(file, "<U+%04X>", c);
        }
    }
    bool is_char_element(whisper_grammar_element elem) {
        switch (elem.type) {
            case WHISPER_GRETYPE_CHAR:           return true;
            case WHISPER_GRETYPE_CHAR_NOT:       return true;
            case WHISPER_GRETYPE_CHAR_ALT:       return true;
            case WHISPER_GRETYPE_CHAR_RNG_UPPER: return true;
            default:                           return false;
        }
    }
    void print_rule_binary(FILE * file, const std::vector<whisper_grammar_element> & rule) {
        for (auto elem : rule) {
            switch (elem.type) {
                case WHISPER_GRETYPE_END:            fprintf(file, "END");            break;
                case WHISPER_GRETYPE_ALT:            fprintf(file, "ALT");            break;
                case WHISPER_GRETYPE_RULE_REF:       fprintf(file, "RULE_REF");       break;
                case WHISPER_GRETYPE_CHAR:           fprintf(file, "CHAR");           break;
                case WHISPER_GRETYPE_CHAR_NOT:       fprintf(file, "CHAR_NOT");       break;
                case WHISPER_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
                case WHISPER_GRETYPE_CHAR_ALT:       fprintf(file, "CHAR_ALT");       break;
            }
            switch (elem.type) {
                case WHISPER_GRETYPE_END:
                case WHISPER_GRETYPE_ALT:
                case WHISPER_GRETYPE_RULE_REF:
                    fprintf(file, "(%u) ", elem.value);
                    break;
                case WHISPER_GRETYPE_CHAR:
                case WHISPER_GRETYPE_CHAR_NOT:
                case WHISPER_GRETYPE_CHAR_RNG_UPPER:
                case WHISPER_GRETYPE_CHAR_ALT:
                    fprintf(file, "(\"");
                    print_grammar_char(file, elem.value);
                    fprintf(file, "\") ");
                    break;
            }
        }
        fprintf(file, "\n");
    }
    void print_rule(
            FILE     * file,
            uint32_t   rule_id,
            const std::vector<whisper_grammar_element> & rule,
            const std::map<uint32_t, std::string>    & symbol_id_names) {
        if (rule.empty() || rule.back().type != WHISPER_GRETYPE_END) {
            throw std::runtime_error(
                "malformed rule, does not end with WHISPER_GRETYPE_END: " + std::to_string(rule_id));
        }
        fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
        for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
            whisper_grammar_element elem = rule[i];
            switch (elem.type) {
                case WHISPER_GRETYPE_END:
                    throw std::runtime_error(
                        "unexpected end of rule: " + std::to_string(rule_id) + "," +
                        std::to_string(i));
                case WHISPER_GRETYPE_ALT:
                    fprintf(file, "| ");
                    break;
                case WHISPER_GRETYPE_RULE_REF:
                    fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
                    break;
                case WHISPER_GRETYPE_CHAR:
                    fprintf(file, "[");
                    print_grammar_char(file, elem.value);
                    break;
                case WHISPER_GRETYPE_CHAR_NOT:
                    fprintf(file, "[^");
                    print_grammar_char(file, elem.value);
                    break;
                case WHISPER_GRETYPE_CHAR_RNG_UPPER:
                    if (i == 0 || !is_char_element(rule[i - 1])) {
                        throw std::runtime_error(
                            "WHISPER_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
                            std::to_string(rule_id) + "," + std::to_string(i));
                    }
                    fprintf(file, "-");
                    print_grammar_char(file, elem.value);
                    break;
                case WHISPER_GRETYPE_CHAR_ALT:
                    if (i == 0 || !is_char_element(rule[i - 1])) {
                        throw std::runtime_error(
                            "WHISPER_GRETYPE_CHAR_ALT without preceding char: " +
                            std::to_string(rule_id) + "," + std::to_string(i));
                    }
                    print_grammar_char(file, elem.value);
                    break;
            }
            if (is_char_element(elem)) {
                switch (rule[i + 1].type) {
                    case WHISPER_GRETYPE_CHAR_ALT:
                    case WHISPER_GRETYPE_CHAR_RNG_UPPER:
                        break;
                    default:
                        fprintf(file, "] ");
                }
            }
        }
        fprintf(file, "\n");
    }
    void print_grammar(FILE * file, const parse_state & state) {
        try {
            std::map<uint32_t, std::string> symbol_id_names;
            for (auto kv : state.symbol_ids) {
                symbol_id_names[kv.second] = kv.first;
            }
            for (size_t i = 0, end = state.rules.size(); i < end; i++) {
                // fprintf(file, "%zu: ", i);
                // print_rule_binary(file, state.rules[i]);
                print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
                // fprintf(file, "\n");
            }
        } catch (const std::exception & err) {
            fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
        }
    }
    std::vector<const whisper_grammar_element *> parse_state::c_rules() const{
        std::vector<const whisper_grammar_element *> ret;
        for (const auto & rule : rules) {
            ret.push_back(rule.data());
        }
        return ret;
    }
 }
--- a/examples/grammar-parser.h
+++ b/examples/grammar-parser.h
@ -0,0 +1,29 @@
 // Implements a parser for an extended Backus-Naur form (BNF), producing the
 // binary context-free grammar format specified by whisper.h. Supports character
 // ranges, grouping, and repetition operators. As an example, a grammar for
 // arithmetic might look like:
 //
 // root  ::= expr
 // expr  ::= term ([-+*/] term)*
 // term  ::= num | "(" space expr ")" space
 // num   ::= [0-9]+ space
 // space ::= [ \t\n]*
 #pragma once
 #include "whisper.h"
 #include <vector>
 #include <map>
 #include <cstdint>
 #include <string>
 namespace grammar_parser {
    struct parse_state {
        std::map<std::string, uint32_t>                   symbol_ids;
        std::vector<std::vector<whisper_grammar_element>> rules;
        std::vector<const whisper_grammar_element *>      c_rules() const;
    };
    parse_state parse(const char * src);
    void print_grammar(FILE * file, const parse_state & state);
 }
--- a/examples/helpers.js
+++ b/examples/helpers.js
@ -22,6 +22,7 @@ var printTextarea = (function() {
 async function clearCache() {
    if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
        indexedDB.deleteDatabase(dbName);
        location.reload();
    }
 }
--- a/examples/livestream.sh
+++ b/examples/livestream.sh
@ -48,7 +48,7 @@ if [ -n "$3" ]; then
 fi
 # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
 # list available models
 function list_models {
--- a/examples/lsp/lsp.cpp
+++ b/examples/lsp/lsp.cpp
@ -30,6 +30,7 @@ struct whisper_params {
    bool translate     = false;
    bool print_special = false;
    bool print_energy  = false;
    bool use_gpu       = true;
    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
@ -72,6 +73,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
        else {
@ -102,6 +104,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "\n");
@ -432,7 +435,9 @@ int main(int argc, char ** argv) {
    }
    // whisper init
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    // init audio
    audio_async audio(30*1000);
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -17,28 +17,37 @@ options:
  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
  -sow,      --split-on-word     [false  ] split on word rather than on token
  -bo N,     --best-of N         [5      ] number of best candidates to keep
-  -bs N,     --beam-size N       [-1     ] beam size for beam search
+  -bs N,     --beam-size N       [5      ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
-  -su,       --speed-up          [false  ] speed up audio by x2 (reduced accuracy)
+  -debug,    --debug-mode        [false  ] enable debug mode (eg. dump log_mel)
  -tr,       --translate         [false  ] translate from source language to english
  -di,       --diarize           [false  ] stereo audio diarization
  -tdrz,     --tinydiarize       [false  ] enable tinydiarize (requires a tdrz model)
  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
  -otxt,     --output-txt        [false  ] output result in a text file
  -ovtt,     --output-vtt        [false  ] output result in a vtt file
  -osrt,     --output-srt        [false  ] output result in a srt file
  -olrc,     --output-lrc        [false  ] output result in a lrc file
  -owts,     --output-words      [false  ] output script for generating karaoke video
  -fp,       --font-path         [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
  -ocsv,     --output-csv        [false  ] output result in a CSV file
  -oj,       --output-json       [false  ] output result in a JSON file
  -ojf,      --output-json-full  [false  ] include more information in the JSON file
  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
  -pp,       --print-progress    [false  ] print progress
-  -nt,       --no-timestamps     [true   ] do not print timestamps
+  -nt,       --no-timestamps     [false  ] do not print timestamps
  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
  -dl,       --detect-language   [false  ] exit after automatically detecting language
             --prompt PROMPT     [       ] initial prompt
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -f FNAME,  --file FNAME        [       ] input WAV file path
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
  -ls,       --log-score         [false  ] log best decoder scores of tokens
  -ng,       --no-gpu            [false  ] disable GPU
 ```
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -62,8 +62,8 @@ struct whisper_params {
    int32_t progress_step =  5;
    int32_t max_context  = -1;
    int32_t max_len      =  0;
-    int32_t best_of      =  2;
+    int32_t best_of      = whisper_full_default_params(WHISPER_SAMPLING_GREEDY).greedy.best_of;
-    int32_t beam_size    = -1;
+    int32_t beam_size    = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH).beam_search.beam_size;
    float word_thold    =  0.01f;
    float entropy_thold =  2.40f;
@ -90,6 +90,7 @@ struct whisper_params {
    bool print_progress  = false;
    bool no_timestamps   = false;
    bool log_score       = false;
    bool use_gpu         = true;
    std::string language  = "en";
    std::string prompt;
@ -165,6 +166,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-f"    || arg == "--file")            { params.fname_inp.emplace_back(argv[++i]); }
        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
        else if (arg == "-ls"   || arg == "--log-score")       { params.log_score       = true; }
        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu         = false; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -221,6 +223,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
    fprintf(stderr, "  -ls,       --log-score         [%-7s] log best decoder scores of tokens\n",              params.log_score?"true":"false");
    fprintf(stderr, "  -ng,       --no-gpu            [%-7s] disable GPU\n",                                    params.use_gpu ? "false" : "true");
    fprintf(stderr, "\n");
 }
@ -877,7 +880,10 @@ int main(int argc, char ** argv) {
    // whisper init
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    if (ctx == nullptr) {
        fprintf(stderr, "error: failed to initialize whisper context\n");
@ -919,9 +925,9 @@ int main(int argc, char ** argv) {
            if (params.detect_language) {
                params.language = "auto";
            }
-            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
+            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, %d beams + best of %d, lang = %s, task = %s, %stimestamps = %d ...\n",
                    __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                    params.n_threads, params.n_processors,
+                    params.n_threads, params.n_processors, params.beam_size, params.best_of,
                    params.language.c_str(),
                    params.translate ? "translate" : "transcribe",
                    params.tinydiarize ? "tdrz = 1, " : "",
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@ -0,0 +1,12 @@
 set(TARGET server)
 add_executable(${TARGET} server.cpp httplib.h json.hpp)
 include(DefaultTargetOptions)
 target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
 # Check if the compiler is MinGW
 if(MINGW)
    # Link the necessary libraries for SSL and Winsock
    target_link_libraries(${TARGET} PRIVATE -lcrypt32 -lssl -lcrypto -lws2_32)
 endif()
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -0,0 +1,68 @@
 # whisper.cpp http server
 Simple http server. WAV Files are passed to the inference model via http requests.
 https://github.com/ggerganov/whisper.cpp/assets/1991296/e983ee53-8741-4eb5-9048-afe5e4594b8f
 ## Usage
 ```
 ./server -h
 usage: ./bin/server [options]
 options:
  -h,        --help              [default] show this help message and exit
  -t N,      --threads N         [4      ] number of threads to use during computation
  -p N,      --processors N      [1      ] number of processors to use during computation
  -ot N,     --offset-t N        [0      ] time offset in milliseconds
  -on N,     --offset-n N        [0      ] segment index offset
  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
  -sow,      --split-on-word     [false  ] split on word rather than on token
  -bo N,     --best-of N         [2      ] number of best candidates to keep
  -bs N,     --beam-size N       [-1     ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
  -debug,    --debug-mode        [false  ] enable debug mode (eg. dump log_mel)
  -tr,       --translate         [false  ] translate from source language to english
  -di,       --diarize           [false  ] stereo audio diarization
  -tdrz,     --tinydiarize       [false  ] enable tinydiarize (requires a tdrz model)
  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
  -pr,       --print-realtime    [false  ] print output in realtime
  -pp,       --print-progress    [false  ] print progress
  -nt,       --no-timestamps     [false  ] do not print timestamps
  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
  -dl,       --detect-language   [false  ] exit after automatically detecting language
             --prompt PROMPT     [       ] initial prompt
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
  --host HOST,                   [127.0.0.1] Hostname/ip-adress for the server
  --port PORT,                   [8080   ] Port number for the server
  --convert,                     [false  ] Convert audio to WAV, requires ffmpeg on the server
 ```
 > [!WARNING]  
 > **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
 ## request examples
 **/inference**
 ```
 curl 127.0.0.1:8080/inference \
 -H "Content-Type: multipart/form-data" \
 -F file="@<file-path>" \
 -F temperature="0.2" \
 -F response-format="json"
 ```
 **/load**
 ```
 curl 127.0.0.1:8080/load \
 -H "Content-Type: multipart/form-data" \
 -F model="<path-to-model-file>"
 ```
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
--- a/examples/server/json.hpp
+++ b/examples/server/json.hpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -0,0 +1,811 @@
 #include "common.h"
 #include "whisper.h"
 #include "httplib.h"
 #include "json.hpp"
 #include <cmath>
 #include <fstream>
 #include <cstdio>
 #include <string>
 #include <thread>
 #include <vector>
 #include <cstring>
 #include <sstream>
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 using namespace httplib;
 using json = nlohmann::json;
 namespace {
 // Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
 // Lowest is red, middle is yellow, highest is green.
 const std::vector<std::string> k_colors = {
    "\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
    "\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
 };
 // output formats
 const std::string json_format   = "json";
 const std::string text_format   = "text";
 const std::string srt_format    = "srt";
 const std::string vjson_format  = "verbose_json";
 const std::string vtt_format    = "vtt";
 struct server_params
 {
    std::string hostname = "127.0.0.1";
    std::string public_path = "examples/server/public";
    int32_t port          = 8080;
    int32_t read_timeout  = 600;
    int32_t write_timeout = 600;
    bool ffmpeg_converter = false;
 };
 struct whisper_params {
    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t n_processors =  1;
    int32_t offset_t_ms  =  0;
    int32_t offset_n     =  0;
    int32_t duration_ms  =  0;
    int32_t progress_step =  5;
    int32_t max_context  = -1;
    int32_t max_len      =  0;
    int32_t best_of      =  2;
    int32_t beam_size    = -1;
    float word_thold    =  0.01f;
    float entropy_thold =  2.40f;
    float logprob_thold = -1.00f;
    float userdef_temp  =  0.20f;
    bool speed_up        = false;
    bool debug_mode      = false;
    bool translate       = false;
    bool detect_language = false;
    bool diarize         = false;
    bool tinydiarize     = false;
    bool split_on_word   = false;
    bool no_fallback     = false;
    bool print_special   = false;
    bool print_colors    = false;
    bool print_realtime  = false;
    bool print_progress  = false;
    bool no_timestamps   = false;
    bool use_gpu         = true;
    std::string language        = "en";
    std::string prompt          = "";
    std::string font_path       = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
    std::string model           = "models/ggml-base.en.bin";
    std::string response_format     = json_format;
    // [TDRZ] speaker turn string
    std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
    std::string openvino_encode_device = "CPU";
 };
 //  500 -> 00:05.000
 // 6000 -> 01:00.000
 std::string to_timestamp(int64_t t, bool comma = false) {
    int64_t msec = t * 10;
    int64_t hr = msec / (1000 * 60 * 60);
    msec = msec - hr * (1000 * 60 * 60);
    int64_t min = msec / (1000 * 60);
    msec = msec - min * (1000 * 60);
    int64_t sec = msec / 1000;
    msec = msec - sec * 1000;
    char buf[32];
    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
    return std::string(buf);
 }
 int timestamp_to_sample(int64_t t, int n_samples) {
    return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
 }
 bool is_file_exist(const char *fileName)
 {
    std::ifstream infile(fileName);
    return infile.good();
 }
 void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params,
                         const server_params& sparams) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options] \n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
    fprintf(stderr, "  -t N,      --threads N         [%-7d] number of threads to use during computation\n",    params.n_threads);
    fprintf(stderr, "  -p N,      --processors N      [%-7d] number of processors to use during computation\n", params.n_processors);
    fprintf(stderr, "  -ot N,     --offset-t N        [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
    fprintf(stderr, "  -on N,     --offset-n N        [%-7d] segment index offset\n",                           params.offset_n);
    fprintf(stderr, "  -d  N,     --duration N        [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
    fprintf(stderr, "  -mc N,     --max-context N     [%-7d] maximum number of text context tokens to store\n", params.max_context);
    fprintf(stderr, "  -ml N,     --max-len N         [%-7d] maximum segment length in characters\n",           params.max_len);
    fprintf(stderr, "  -sow,      --split-on-word     [%-7s] split on word rather than on token\n",             params.split_on_word ? "true" : "false");
    fprintf(stderr, "  -bo N,     --best-of N         [%-7d] number of best candidates to keep\n",              params.best_of);
    fprintf(stderr, "  -bs N,     --beam-size N       [%-7d] beam size for beam search\n",                      params.beam_size);
    fprintf(stderr, "  -wt N,     --word-thold N      [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
    fprintf(stderr, "  -et N,     --entropy-thold N   [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
    fprintf(stderr, "  -lpt N,    --logprob-thold N   [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
    // fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
    fprintf(stderr, "  -debug,    --debug-mode        [%-7s] enable debug mode (eg. dump log_mel)\n",           params.debug_mode ? "true" : "false");
    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
    fprintf(stderr, "  -tdrz,     --tinydiarize       [%-7s] enable tinydiarize (requires a tdrz model)\n",     params.tinydiarize ? "true" : "false");
    fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
    fprintf(stderr, "  -pr,       --print-realtime    [%-7s] print output in realtime\n",                       params.print_realtime ? "true" : "false");
    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "true" : "false");
    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
    fprintf(stderr, "  -dl,       --detect-language   [%-7s] exit after automatically detecting language\n",    params.detect_language ? "true" : "false");
    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
    // server params
    fprintf(stderr, "  --host HOST,                   [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
    fprintf(stderr, "  --port PORT,                   [%-7d] Port number for the server\n", sparams.port);
    fprintf(stderr, "  --public PATH,                 [%-7s] Path to the public folder\n", sparams.public_path.c_str());
    fprintf(stderr, "  --convert,                     [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
    fprintf(stderr, "\n");
 }
 bool whisper_params_parse(int argc, char ** argv, whisper_params & params, server_params & sparams) {
    for (int i = 1; i < argc; i++) {
        std::string arg = argv[i];
        if (arg == "-h" || arg == "--help") {
            whisper_print_usage(argc, argv, params, sparams);
            exit(0);
        }
        else if (arg == "-t"    || arg == "--threads")         { params.n_threads       = std::stoi(argv[++i]); }
        else if (arg == "-p"    || arg == "--processors")      { params.n_processors    = std::stoi(argv[++i]); }
        else if (arg == "-ot"   || arg == "--offset-t")        { params.offset_t_ms     = std::stoi(argv[++i]); }
        else if (arg == "-on"   || arg == "--offset-n")        { params.offset_n        = std::stoi(argv[++i]); }
        else if (arg == "-d"    || arg == "--duration")        { params.duration_ms     = std::stoi(argv[++i]); }
        else if (arg == "-mc"   || arg == "--max-context")     { params.max_context     = std::stoi(argv[++i]); }
        else if (arg == "-ml"   || arg == "--max-len")         { params.max_len         = std::stoi(argv[++i]); }
        else if (arg == "-bo"   || arg == "--best-of")         { params.best_of         = std::stoi(argv[++i]); }
        else if (arg == "-bs"   || arg == "--beam-size")       { params.beam_size       = std::stoi(argv[++i]); }
        else if (arg == "-wt"   || arg == "--word-thold")      { params.word_thold      = std::stof(argv[++i]); }
        else if (arg == "-et"   || arg == "--entropy-thold")   { params.entropy_thold   = std::stof(argv[++i]); }
        else if (arg == "-lpt"  || arg == "--logprob-thold")   { params.logprob_thold   = std::stof(argv[++i]); }
        // else if (arg == "-su"   || arg == "--speed-up")        { params.speed_up        = true; }
        else if (arg == "-debug"|| arg == "--debug-mode")      { params.debug_mode      = true; }
        else if (arg == "-tr"   || arg == "--translate")       { params.translate       = true; }
        else if (arg == "-di"   || arg == "--diarize")         { params.diarize         = true; }
        else if (arg == "-tdrz" || arg == "--tinydiarize")     { params.tinydiarize     = true; }
        else if (arg == "-sow"  || arg == "--split-on-word")   { params.split_on_word   = true; }
        else if (arg == "-nf"   || arg == "--no-fallback")     { params.no_fallback     = true; }
        else if (arg == "-fp"   || arg == "--font-path")       { params.font_path       = argv[++i]; }
        else if (arg == "-ps"   || arg == "--print-special")   { params.print_special   = true; }
        else if (arg == "-pc"   || arg == "--print-colors")    { params.print_colors    = true; }
        else if (arg == "-pr"   || arg == "--print-realtime")  { params.print_realtime  = true; }
        else if (arg == "-pp"   || arg == "--print-progress")  { params.print_progress  = true; }
        else if (arg == "-nt"   || arg == "--no-timestamps")   { params.no_timestamps   = true; }
        else if (arg == "-l"    || arg == "--language")        { params.language        = argv[++i]; }
        else if (arg == "-dl"   || arg == "--detect-language") { params.detect_language = true; }
        else if (                  arg == "--prompt")          { params.prompt          = argv[++i]; }
        else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu         = false; }
        // server params
        else if (                  arg == "--port")            { sparams.port        = std::stoi(argv[++i]); }
        else if (                  arg == "--host")            { sparams.hostname    = argv[++i]; }
        else if (                  arg == "--public")          { sparams.public_path = argv[++i]; }
        else if (                  arg == "--convert")         { sparams.ffmpeg_converter     = true; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params, sparams);
            exit(0);
        }
    }
    return true;
 }
 struct whisper_print_user_data {
    const whisper_params * params;
    const std::vector<std::vector<float>> * pcmf32s;
    int progress_prev;
 };
 void check_ffmpeg_availibility() {
    int result = system("ffmpeg -version");
    if (result == 0) {
        std::cout << "ffmpeg is available." << std::endl;
    } else {
        // ffmpeg is not available
        std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
        std::cout << "and that its executable is included in your system's PATH. ";
        exit(0);
    }
 }
 bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
    std::ostringstream cmd_stream;
    std::string converted_filename_temp = temp_filename + "_temp.wav";
    cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
    std::string cmd = cmd_stream.str();
    int status = std::system(cmd.c_str());
    if (status != 0) {
        error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
        return false;
    }
    // Remove the original file
    if (remove(temp_filename.c_str()) != 0) {
        error_resp = "{\"error\":\"Failed to remove the original file.\"}";
        return false;
    }
    // Rename the temporary file to match the original filename
    if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
        error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
        return false;
    }
    return true;
 }
 std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
    std::string speaker = "";
    const int64_t n_samples = pcmf32s[0].size();
    const int64_t is0 = timestamp_to_sample(t0, n_samples);
    const int64_t is1 = timestamp_to_sample(t1, n_samples);
    double energy0 = 0.0f;
    double energy1 = 0.0f;
    for (int64_t j = is0; j < is1; j++) {
        energy0 += fabs(pcmf32s[0][j]);
        energy1 += fabs(pcmf32s[1][j]);
    }
    if (energy0 > 1.1*energy1) {
        speaker = "0";
    } else if (energy1 > 1.1*energy0) {
        speaker = "1";
    } else {
        speaker = "?";
    }
    //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, speaker = %s\n", is0, is1, energy0, energy1, speaker.c_str());
    if (!id_only) {
        speaker.insert(0, "(speaker ");
        speaker.append(")");
    }
    return speaker;
 }
 void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
    int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
    int * progress_prev  = &(((whisper_print_user_data *) user_data)->progress_prev);
    if (progress >= *progress_prev + progress_step) {
        *progress_prev += progress_step;
        fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress);
    }
 }
 void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
    const auto & params  = *((whisper_print_user_data *) user_data)->params;
    const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
    const int n_segments = whisper_full_n_segments(ctx);
    std::string speaker = "";
    int64_t t0 = 0;
    int64_t t1 = 0;
    // print the last n_new segments
    const int s0 = n_segments - n_new;
    if (s0 == 0) {
        printf("\n");
    }
    for (int i = s0; i < n_segments; i++) {
        if (!params.no_timestamps || params.diarize) {
            t0 = whisper_full_get_segment_t0(ctx, i);
            t1 = whisper_full_get_segment_t1(ctx, i);
        }
        if (!params.no_timestamps) {
            printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
        }
        if (params.diarize && pcmf32s.size() == 2) {
            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
        }
        if (params.print_colors) {
            for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
                if (params.print_special == false) {
                    const whisper_token id = whisper_full_get_token_id(ctx, i, j);
                    if (id >= whisper_token_eot(ctx)) {
                        continue;
                    }
                }
                const char * text = whisper_full_get_token_text(ctx, i, j);
                const float  p    = whisper_full_get_token_p   (ctx, i, j);
                const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
                printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
            }
        } else {
            const char * text = whisper_full_get_segment_text(ctx, i);
            printf("%s%s", speaker.c_str(), text);
        }
        if (params.tinydiarize) {
            if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
                printf("%s", params.tdrz_speaker_turn.c_str());
            }
        }
        // with timestamps or speakers: each segment on new line
        if (!params.no_timestamps || params.diarize) {
            printf("\n");
        }
        fflush(stdout);
    }
 }
 std::string output_str(struct whisper_context * ctx, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
    std::stringstream result;
    const int n_segments = whisper_full_n_segments(ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
        std::string speaker = "";
        if (params.diarize && pcmf32s.size() == 2)
        {
            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
        }
        result << speaker << text << "\n";
    }
    return result.str();
 }
 void get_req_parameters(const Request & req, whisper_params & params)
 {
    // user model configu.has_fileion
    if (req.has_file("offset-t"))
    {
        params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
    }
    if (req.has_file("offset-n"))
    {
        params.offset_n = std::stoi(req.get_file_value("offset-n").content);
    }
    if (req.has_file("duration"))
    {
        params.duration_ms = std::stoi(req.get_file_value("duration").content);
    }
    if (req.has_file("max-context"))
    {
        params.max_context = std::stoi(req.get_file_value("max-context").content);
    }
    if (req.has_file("prompt"))
    {
        params.prompt = req.get_file_value("prompt").content;
    }
    if (req.has_file("response-format"))
    {
        params.response_format = req.get_file_value("response-format").content;
    }
    if (req.has_file("temperature"))
    {
        params.userdef_temp = std::stof(req.get_file_value("temperature").content);
    }
 }
 }  // namespace
 int main(int argc, char ** argv) {
    whisper_params params;
    server_params sparams;
    std::mutex whisper_mutex;
    if (whisper_params_parse(argc, argv, params, sparams) == false) {
        whisper_print_usage(argc, argv, params, sparams);
        return 1;
    }
    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params, sparams);
        exit(0);
    }
    if (params.diarize && params.tinydiarize) {
        fprintf(stderr, "error: cannot use both --diarize and --tinydiarize\n");
        whisper_print_usage(argc, argv, params, sparams);
        exit(0);
    }
    if (sparams.ffmpeg_converter) {
        check_ffmpeg_availibility();
    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    if (ctx == nullptr) {
        fprintf(stderr, "error: failed to initialize whisper context\n");
        return 3;
    }
    // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
    whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
    Server svr;
    svr.set_default_headers({{"Server", "whisper.cpp"},
                             {"Access-Control-Allow-Origin", "*"},
                             {"Access-Control-Allow-Headers", "content-type"}});
    std::string const default_content = "<html>hello</html>";
    // this is only called if no index.html is found in the public --path
    svr.Get("/", [&default_content](const Request &, Response &res){
        res.set_content(default_content, "text/html");
        return false;
    });
    svr.Post("/inference", [&](const Request &req, Response &res){
        // acquire whisper model mutex lock
        whisper_mutex.lock();
        // first check user requested fields of the request
        if (!req.has_file("file"))
        {
            fprintf(stderr, "error: no 'file' field in the request\n");
            const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        auto audio_file = req.get_file_value("file");
        // check non-required fields
        get_req_parameters(req, params);
        std::string filename{audio_file.filename};
        printf("Received request: %s\n", filename.c_str());
        // audio arrays
        std::vector<float> pcmf32;               // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
        // write to temporary file
        const std::string temp_filename = "whisper_server_temp_file.wav";
        std::ofstream temp_file{temp_filename, std::ios::binary};
        temp_file << audio_file.content;
        temp_file.close();
        // if file is not wav, convert to wav
        if (sparams.ffmpeg_converter) {
            std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
            const bool is_converted = convert_to_wav(temp_filename, error_resp);
            if (!is_converted) {
                res.set_content(error_resp, "application/json");
                whisper_mutex.unlock();
                return;
            }
        }
        // read wav content into pcmf32
        if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
            fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
            const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
            res.set_content(error_resp, "application/json");
            std::remove(temp_filename.c_str());
            whisper_mutex.unlock();
            return;
        }
        // remove temp file
        std::remove(temp_filename.c_str());
        printf("Successfully loaded %s\n", filename.c_str());
        // print system information
        {
            fprintf(stderr, "\n");
            fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
                    params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
        }
        // print some info about the processing
        {
            fprintf(stderr, "\n");
            if (!whisper_is_multilingual(ctx)) {
                if (params.language != "en" || params.translate) {
                    params.language = "en";
                    params.translate = false;
                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
                }
            }
            if (params.detect_language) {
                params.language = "auto";
            }
            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
                    __func__, filename.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
                    params.n_threads, params.n_processors,
                    params.language.c_str(),
                    params.translate ? "translate" : "transcribe",
                    params.tinydiarize ? "tdrz = 1, " : "",
                    params.no_timestamps ? 0 : 1);
            fprintf(stderr, "\n");
        }
        // run the inference
        {
            printf("Running whisper.cpp inference on %s\n", filename.c_str());
            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
            wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
            wparams.print_realtime   = false;
            wparams.print_progress   = params.print_progress;
            wparams.print_timestamps = !params.no_timestamps;
            wparams.print_special    = params.print_special;
            wparams.translate        = params.translate;
            wparams.language         = params.language.c_str();
            wparams.detect_language  = params.detect_language;
            wparams.n_threads        = params.n_threads;
            wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
            wparams.offset_ms        = params.offset_t_ms;
            wparams.duration_ms      = params.duration_ms;
            wparams.thold_pt         = params.word_thold;
            wparams.max_len          = params.max_len == 0 ? 60 : params.max_len;
            wparams.split_on_word    = params.split_on_word;
            wparams.speed_up         = params.speed_up;
            wparams.debug_mode       = params.debug_mode;
            wparams.tdrz_enable      = params.tinydiarize; // [TDRZ]
            wparams.initial_prompt   = params.prompt.c_str();
            wparams.greedy.best_of        = params.best_of;
            wparams.beam_search.beam_size = params.beam_size;
            wparams.temperature_inc  = params.userdef_temp;
            wparams.entropy_thold    = params.entropy_thold;
            wparams.logprob_thold    = params.logprob_thold;
            whisper_print_user_data user_data = { &params, &pcmf32s, 0 };
            // this callback is called on each new segment
            if (params.print_realtime) {
                wparams.new_segment_callback           = whisper_print_segment_callback;
                wparams.new_segment_callback_user_data = &user_data;
            }
            if (wparams.print_progress) {
                wparams.progress_callback           = whisper_print_progress_callback;
                wparams.progress_callback_user_data = &user_data;
            }
            // examples for abort mechanism
            // in examples below, we do not abort the processing, but we could if the flag is set to true
            // the callback is called before every encoder run - if it returns false, the processing is aborted
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return !is_aborted;
                };
                wparams.encoder_begin_callback_user_data = &is_aborted;
            }
            // the callback is called before every computation - if it returns true, the computation is aborted
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
                wparams.abort_callback = [](void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return is_aborted;
                };
                wparams.abort_callback_user_data = &is_aborted;
            }
            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                const std::string error_resp = "{\"error\":\"failed to process audio\"}";
                res.set_content(error_resp, "application/json");
                whisper_mutex.unlock();
                return;
            }
        }
        // return results to user
        if (params.response_format == text_format)
        {
            std::string results = output_str(ctx, params, pcmf32s);
            res.set_content(results.c_str(), "text/html");
        }
        else if (params.response_format == srt_format)
        {
            std::stringstream ss;
            const int n_segments = whisper_full_n_segments(ctx);
            for (int i = 0; i < n_segments; ++i) {
                const char * text = whisper_full_get_segment_text(ctx, i);
                const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
                const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
                std::string speaker = "";
                if (params.diarize && pcmf32s.size() == 2)
                {
                    speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
                }
                ss << i + 1 + params.offset_n << "\n";
                ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
                ss << speaker << text << "\n\n";
            }
            res.set_content(ss.str(), "application/x-subrip");
        } else if (params.response_format == vtt_format) {
            std::stringstream ss;
            ss << "WEBVTT\n\n";
            const int n_segments = whisper_full_n_segments(ctx);
            for (int i = 0; i < n_segments; ++i) {
                const char * text = whisper_full_get_segment_text(ctx, i);
                const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
                const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
                std::string speaker = "";
                if (params.diarize && pcmf32s.size() == 2)
                {
                    speaker = estimate_diarization_speaker(pcmf32s, t0, t1, true);
                    speaker.insert(0, "<v Speaker");
                    speaker.append(">");
                }
                ss << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
                ss << speaker << text << "\n\n";
            }
            res.set_content(ss.str(), "text/vtt");
        }
        // TODO add more output formats
        else
        {
            std::string results = output_str(ctx, params, pcmf32s);
            json jres = json{
                {"text", results}
            };
            res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
                            "application/json");
        }
        // return whisper model mutex lock
        whisper_mutex.unlock();
    });
    svr.Post("/load", [&](const Request &req, Response &res){
        whisper_mutex.lock();
        if (!req.has_file("model"))
        {
            fprintf(stderr, "error: no 'model' field in the request\n");
            const std::string error_resp = "{\"error\":\"no 'model' field in the request\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        std::string model = req.get_file_value("model").content;
        if (!is_file_exist(model.c_str()))
        {
            fprintf(stderr, "error: 'model': %s not found!\n", model.c_str());
            const std::string error_resp = "{\"error\":\"model not found!\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        // clean up
        whisper_free(ctx);
        // whisper init
        ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
        // TODO perhaps load prior model here instead of exit
        if (ctx == nullptr) {
            fprintf(stderr, "error: model init  failed, no model loaded must exit\n");
            exit(1);
        }
        // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
        whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
        const std::string success = "Load was successful!";
        res.set_content(success, "application/text");
        // check if the model is in the file system
        whisper_mutex.unlock();
    });
    svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
        const char fmt[] = "500 Internal Server Error\n%s";
        char buf[BUFSIZ];
        try {
            std::rethrow_exception(std::move(ep));
        } catch (std::exception &e) {
            snprintf(buf, sizeof(buf), fmt, e.what());
        } catch (...) {
            snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
        }
        res.set_content(buf, "text/plain");
        res.status = 500;
    });
    svr.set_error_handler([](const Request &, Response &res) {
        if (res.status == 400) {
            res.set_content("Invalid request", "text/plain");
        } else if (res.status != 500) {
            res.set_content("File Not Found", "text/plain");
            res.status = 404;
        }
    });
    // set timeouts and change hostname and port
    svr.set_read_timeout(sparams.read_timeout);
    svr.set_write_timeout(sparams.write_timeout);
    if (!svr.bind_to_port(sparams.hostname, sparams.port))
    {
        fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n",
                sparams.hostname.c_str(), sparams.port);
        return 1;
    }
    // Set the base directory for serving static files
    svr.set_base_dir(sparams.public_path);
    // to make it ctrl+clickable:
    printf("\nwhisper server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
    if (!svr.listen_after_bind())
    {
        return 1;
    }
    whisper_print_timings(ctx);
    whisper_free(ctx);
    return 0;
 }
--- a/examples/stream.wasm/emscripten.cpp
+++ b/examples/stream.wasm/emscripten.cpp
@ -132,7 +132,7 @@ EMSCRIPTEN_BINDINGS(stream) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file(path_model.c_str());
+                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -48,11 +48,12 @@ struct whisper_params {
    bool no_context    = true;
    bool no_timestamps = false;
    bool tinydiarize   = false;
    bool save_audio    = false; // save audio to wav file
    bool use_gpu       = true;
    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
    std::string fname_out;
    bool save_audio = false; // save audio to wav file
 };
 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -84,6 +85,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-f"    || arg == "--file")          { params.fname_out     = argv[++i]; }
        else if (arg == "-tdrz" || arg == "--tinydiarize")   { params.tinydiarize   = true; }
        else if (arg == "-sa"   || arg == "--save-audio")    { params.save_audio    = true; }
        else if (arg == "-ng"   || arg == "--no-gpu")        { params.use_gpu       = false; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@ -120,6 +122,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -f FNAME, --file FNAME    [%-7s] text output file name\n",                          params.fname_out.c_str());
    fprintf(stderr, "  -tdrz,    --tinydiarize   [%-7s] enable tinydiarize (requires a tdrz model)\n",     params.tinydiarize ? "true" : "false");
    fprintf(stderr, "  -sa,      --save-audio    [%-7s] save the recorded audio to a file\n",              params.save_audio ? "true" : "false");
    fprintf(stderr, "  -ng,      --no-gpu        [%-7s] disable GPU inference\n",                          params.use_gpu ? "false" : "true");
    fprintf(stderr, "\n");
 }
@ -163,7 +166,10 @@ int main(int argc, char ** argv) {
        exit(0);
    }
-    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    std::vector<float> pcmf32    (n_samples_30s, 0.0f);
    std::vector<float> pcmf32_old;
--- a/examples/talk-llama/CMakeLists.txt
+++ b/examples/talk-llama/CMakeLists.txt
@ -1,23 +1,18 @@
 if (WHISPER_SDL2)
    # talk-llama
    set(TARGET talk-llama)
-    #add_executable(${TARGET} talk-llama.cpp llama.cpp)
+    add_executable(${TARGET} talk-llama.cpp llama.cpp)
-    #target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
+    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
    #target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-    # TODO: this is temporary
+    if (WHISPER_CLBLAST)
-    #       need to export ggml symbols for MSVC, but too lazy ..
+        set(CLBLAST_LIBNAME clblast)
-    add_executable(${TARGET}
+    endif ()
-        talk-llama.cpp
+    target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CLBLAST_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
        llama.cpp
        ../common.cpp
        ../common-sdl.cpp
        ../../ggml.c
        ../../ggml-alloc.c
        ../../whisper.cpp)
-    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
+    if(WIN32)
-    target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+        # It requires Windows 8.1 or later for PrefetchVirtualMemory
        target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
    endif()
    include(DefaultTargetOptions)
 endif ()
--- a/examples/talk-llama/llama.cpp
+++ b/examples/talk-llama/llama.cpp
--- a/examples/talk-llama/llama.h
+++ b/examples/talk-llama/llama.h
@ -39,10 +39,11 @@
 #define LLAMA_MAX_RNG_STATE (64*1024)
 #define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
 #define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
 #define LLAMA_SESSION_MAGIC   LLAMA_FILE_MAGIC_GGSN
-#define LLAMA_SESSION_VERSION 2
+#define LLAMA_SESSION_VERSION 3
 #if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
 // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
@ -126,7 +127,7 @@ extern "C" {
        bool sorted;
    } llama_token_data_array;
-    typedef void (*llama_progress_callback)(float progress, void *ctx);
+    typedef bool (*llama_progress_callback)(float progress, void *ctx);
    // Input data for llama_decode
    // A llama_batch object can contain input about one or many sequences
@ -158,16 +159,38 @@ extern "C" {
        llama_seq_id all_seq_id; // used if seq_id == NULL
    } llama_batch;
    enum llama_model_kv_override_type {
        LLAMA_KV_OVERRIDE_INT,
        LLAMA_KV_OVERRIDE_FLOAT,
        LLAMA_KV_OVERRIDE_BOOL,
    };
    struct llama_model_kv_override {
        char key[128];
        enum llama_model_kv_override_type tag;
        union {
            int64_t int_value;
            double float_value;
            bool bool_value;
        };
    };
    struct llama_model_params {
        int32_t n_gpu_layers; // number of layers to store in VRAM
        int32_t main_gpu;     // the GPU that is used for scratch and small tensors
        const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
-        // called with a progress value between 0 and 1, pass NULL to disable
+        // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
        // If the provided progress_callback returns true, model loading continues.
        // If it returns false, model loading is immediately aborted.
        llama_progress_callback progress_callback;
        // context pointer passed to the progress callback
        void * progress_callback_user_data;
        // override key-value pairs of the model meta data
        const struct llama_model_kv_override * kv_overrides;
        // Keep the booleans together to avoid misalignment during copy-by-value.
        bool vocab_only; // only load the vocabulary, no weights
        bool use_mmap;   // use mmap if possible
@ -185,17 +208,20 @@ extern "C" {
        // ref: https://github.com/ggerganov/llama.cpp/pull/2054
        float    rope_freq_base;   // RoPE base frequency, 0 = from model
        float    rope_freq_scale;  // RoPE frequency scaling factor, 0 = from model
-        float    yarn_ext_factor;  // YaRN extrapolation mix factor, NaN = from model
+        float    yarn_ext_factor;  // YaRN extrapolation mix factor, negative = from model
        float    yarn_attn_factor; // YaRN magnitude scaling factor
        float    yarn_beta_fast;   // YaRN low correction dim
        float    yarn_beta_slow;   // YaRN high correction dim
        uint32_t yarn_orig_ctx;    // YaRN original context size
        enum ggml_type type_k; // data type for K cache
        enum ggml_type type_v; // data type for V cache
        // Keep the booleans together to avoid misalignment during copy-by-value.
        bool mul_mat_q;   // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
-        bool f16_kv;     // use fp16 for KV cache, fp32 otherwise
+        bool logits_all;  // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
        bool logits_all; // the llama_eval() call computes all logits, not just the last one
        bool embedding;   // embedding mode only
        bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
    };
    // model quantization parameters
@ -290,7 +316,9 @@ extern "C" {
    LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
-    LLAMA_API int llama_n_ctx      (const struct llama_context * ctx);
+    // TODO: become more consistent with returned int types across the API
    LLAMA_API uint32_t llama_n_ctx      (const struct llama_context * ctx);
    LLAMA_API uint32_t llama_n_batch    (const struct llama_context * ctx);
    LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
@ -301,6 +329,23 @@ extern "C" {
    // Get the model's RoPE frequency scaling factor
    LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
    // Functions to access the model's GGUF metadata scalar values
    // - The functions return the length of the string on success, or -1 on failure
    // - The output string is always null-terminated and cleared on failure
    // - GGUF array values are not supported by these functions
    // Get metadata value as a string by key name
    LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
    // Get the number of metadata key/value pairs
    LLAMA_API int llama_model_meta_count(const struct llama_model * model);
    // Get metadata key name by index
    LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
    // Get metadata value as a string by index
    LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
    // Get a string describing the model type
    LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
@ -344,9 +389,60 @@ extern "C" {
    // KV cache
    //
-    // Returns the number of tokens in the KV cache
+    // Information associated with an individual cell in the KV cache view.
-    LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
+    struct llama_kv_cache_view_cell {
-            "avoid using this, it will be removed in the future, instead - count the tokens in user code");
+        // The position for this cell. Takes KV cache shifts into account.
        // May be negative if the cell is not populated.
        llama_pos pos;
    };
    // An updateable view of the KV cache.
    struct llama_kv_cache_view {
        // Number of KV cache cells. This will be the same as the context size.
        int32_t n_cells;
        // Maximum number of sequences that can exist in a cell. It's not an error
        // if there are more sequences in a cell than this value, however they will
        // not be visible in the view cells_sequences.
        int32_t n_max_seq;
        // Number of tokens in the cache. For example, if there are two populated
        // cells, the first with 1 sequence id in it and the second with 2 sequence
        // ids then you'll have 3 tokens.
        int32_t token_count;
        // Number of populated cache cells.
        int32_t used_cells;
        // Maximum contiguous empty slots in the cache.
        int32_t max_contiguous;
        // Index to the start of the max_contiguous slot range. Can be negative
        // when cache is full.
        int32_t max_contiguous_idx;
        // Information for an individual cell.
        struct llama_kv_cache_view_cell * cells;
        // The sequences for each cell. There will be n_max_seq items per cell.
        llama_seq_id * cells_sequences;
    };
    // Create an empty KV cache view. (use only for debugging purposes)
    LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
    // Free a KV cache view. (use only for debugging purposes)
    LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
    // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
    LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
    // Returns the number of tokens in the KV cache (slow, use only for debug)
    // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
    LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
    // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
    LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
    // Clear the KV cache
    LLAMA_API void llama_kv_cache_clear(
@ -517,6 +613,12 @@ extern "C" {
    LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
    LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
    // Returns -1 if unknown, 1 for true or 0 for false.
    LLAMA_API int         llama_add_bos_token(const struct llama_model * model);
    // Returns -1 if unknown, 1 for true or 0 for false.
    LLAMA_API int         llama_add_eos_token(const struct llama_model * model);
    // codellama infill tokens
    LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
    LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@ -53,6 +53,7 @@ struct whisper_params {
    int32_t capture_id = -1;
    int32_t max_tokens = 32;
    int32_t audio_ctx  = 0;
    int32_t n_gpu_layers = 999;
    float vad_thold  = 0.6f;
    float freq_thold = 100.0f;
@ -63,6 +64,7 @@ struct whisper_params {
    bool print_energy   = false;
    bool no_timestamps  = true;
    bool verbose_prompt = false;
    bool use_gpu        = true;
    std::string person      = "Georgi";
    std::string language    = "en";
@ -89,13 +91,15 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-c"   || arg == "--capture")        { params.capture_id     = std::stoi(argv[++i]); }
        else if (arg == "-mt"  || arg == "--max-tokens")     { params.max_tokens     = std::stoi(argv[++i]); }
        else if (arg == "-ac"  || arg == "--audio-ctx")      { params.audio_ctx      = std::stoi(argv[++i]); }
        else if (arg == "-ngl" || arg == "--n-gpu-layers")   { params.n_gpu_layers   = std::stoi(argv[++i]); }
        else if (arg == "-vth" || arg == "--vad-thold")      { params.vad_thold      = std::stof(argv[++i]); }
        else if (arg == "-fth" || arg == "--freq-thold")     { params.freq_thold     = std::stof(argv[++i]); }
        else if (arg == "-su"  || arg == "--speed-up")       { params.speed_up       = true; }
        else if (arg == "-tr"  || arg == "--translate")      { params.translate      = true; }
        else if (arg == "-ps"  || arg == "--print-special")  { params.print_special  = true; }
        else if (arg == "-pe"  || arg == "--print-energy")   { params.print_energy   = true; }
-        else if (arg == "--verbose-prompt")                 { params.verbose_prompt = true; }
+        else if (arg == "-vp"  || arg == "--verbose-prompt") { params.verbose_prompt = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")         { params.use_gpu        = false; }
        else if (arg == "-p"   || arg == "--person")         { params.person         = argv[++i]; }
        else if (arg == "--session")                         { params.path_session   = argv[++i];}
        else if (arg == "-l"   || arg == "--language")       { params.language       = argv[++i]; }
@ -110,6 +114,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
            }
        }
        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -131,12 +136,15 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -c ID,    --capture ID     [%-7d] capture device ID\n",                           params.capture_id);
    fprintf(stderr, "  -mt N,    --max-tokens N   [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
    fprintf(stderr, "  -ac N,    --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
    fprintf(stderr, "  -ngl N,   --n-gpu-layers N [%-7d] number of layers to store in VRAM\n",           params.n_gpu_layers);
    fprintf(stderr, "  -vth N,   --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
    fprintf(stderr, "  -fth N,   --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
    fprintf(stderr, "  -su,      --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
    fprintf(stderr, "  -tr,      --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,      --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,      --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -vp,      --verbose-prompt [%-7s] print prompt at start\n",                       params.verbose_prompt ? "true" : "false");
    fprintf(stderr, "  -ng,      --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -p NAME,  --person NAME    [%-7s] person name (for prompt selection)\n",          params.person.c_str());
    fprintf(stderr, "  -l LANG,  --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -mw FILE, --model-whisper  [%-7s] whisper model file\n",                          params.model_wsp.c_str());
@ -144,7 +152,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -s FILE,  --speak TEXT     [%-7s] command for TTS\n",                             params.speak.c_str());
    fprintf(stderr, "  --prompt-file FNAME        [%-7s] file with custom prompt to start dialog\n",     "");
    fprintf(stderr, "  --session FNAME                   file to cache model state in (may be large!) (default: none)\n");
    fprintf(stderr, "  --verbose-prompt          [%-7s] print prompt at start\n",                       params.verbose_prompt ? "true" : "false");
    fprintf(stderr, "  -f FNAME, --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
    fprintf(stderr, "\n");
 }
@ -244,7 +251,7 @@ int main(int argc, char ** argv) {
        return 1;
    }
-    if (whisper_lang_id(params.language.c_str()) == -1) {
+    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params);
        exit(0);
@ -252,13 +259,21 @@ int main(int argc, char ** argv) {
    // whisper init
-    struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
+    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
    // llama init
    llama_backend_init(true);
    auto lmparams = llama_model_default_params();
    if (!params.use_gpu) {
        lmparams.n_gpu_layers = 0;
    } else {
        lmparams.n_gpu_layers = params.n_gpu_layers;
    }
    struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
@ -267,7 +282,6 @@ int main(int argc, char ** argv) {
    // tune these to your liking
    lcparams.n_ctx      = 2048;
    lcparams.seed       = 1;
    lcparams.f16_kv     = true;
    lcparams.n_threads  = params.n_threads;
    struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
@ -671,8 +685,8 @@ int main(int argc, char ** argv) {
                    }
                }
-                text_to_speak = ::replace(text_to_speak, "\"", "");
+                text_to_speak = ::replace(text_to_speak, "'", "'\"'\"'");
-                int ret = system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
+                int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + text_to_speak + "'").c_str());
                if (ret != 0) {
                    fprintf(stderr, "%s: failed to speak\n", __func__);
                }
--- a/examples/talk.wasm/emscripten.cpp
+++ b/examples/talk.wasm/emscripten.cpp
@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file(path_model.c_str());
+                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
--- a/examples/talk.wasm/gpt-2.cpp
+++ b/examples/talk.wasm/gpt-2.cpp
@ -155,33 +155,33 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
        const int n_ctx   = hparams.n_ctx;
        const int n_vocab = hparams.n_vocab;
-        ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_g
+        ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g
-        ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_b
+        ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b
-        ctx_size += n_vocab*n_embd*ggml_type_sizef(wtype);         // wte
+        ctx_size += n_vocab*ggml_row_size(wtype, n_embd);         // wte
-        ctx_size +=   n_ctx*n_embd*ggml_type_sizef(GGML_TYPE_F32); // wpe
+        ctx_size +=   n_ctx*ggml_row_size(GGML_TYPE_F32, n_embd); // wpe
-        ctx_size += n_vocab*n_embd*ggml_type_sizef(wtype);         // lm_head
+        ctx_size += n_vocab*ggml_row_size(wtype, n_embd);         // lm_head
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_g
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_b
-        ctx_size += n_layer*(3*n_embd*n_embd*ggml_type_sizef(wtype));         // c_attn_attn_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         3*n_embd*n_embd)); // c_attn_attn_w
-        ctx_size += n_layer*(       3*n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 3*n_embd));        // c_attn_attn_b
-        ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype));           // c_attn_proj_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         n_embd*n_embd)); // c_attn_proj_w
-        ctx_size += n_layer*(       n_embd*ggml_type_sizef(GGML_TYPE_F32));   // c_attn_proj_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd));        // c_attn_proj_b
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype));         // c_mlp_fc_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         4*n_embd*n_embd)); // c_mlp_fc_w
-        ctx_size += n_layer*(       4*n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd));        // c_mlp_fc_b
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype));         // c_mlp_proj_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         4*n_embd*n_embd)); // c_mlp_proj_w
-        ctx_size += n_layer*(         n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32,   n_embd));        // c_mlp_proj_b
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
+        ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_k
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
+        ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_v
        ctx_size += (6 + 12*n_layer)*256; // object overhead
@ -524,8 +524,7 @@ bool gpt2_eval(
            struct ggml_tensor * KQ_scaled =
                ggml_scale(ctx0,
                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head))
+                        1.0f/sqrt(float(n_embd)/n_head));
                        );
            // KQ_masked = mask_past(KQ_scaled)
            // [n_past + N, N, 12]
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@ -121,13 +121,13 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
            return false;
        }
-        std::string word;
+        char word[129];
        for (int i = 0; i < n_vocab; i++) {
            uint32_t len;
            fin.read((char *) &len, sizeof(len));
-
+            word[len] = '\0';
-            word.resize(len);
+            fin.read((char *) word, len);
            fin.read((char *) word.data(), len);
            vocab.token_to_id[word] = i;
            vocab.id_to_token[i] = word;
@ -155,33 +155,33 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
        const int n_ctx   = hparams.n_ctx;
        const int n_vocab = hparams.n_vocab;
-        ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_g
+        ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g
-        ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // ln_f_b
+        ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b
-        ctx_size += n_vocab*n_embd*ggml_type_sizef(wtype);         // wte
+        ctx_size += n_vocab*ggml_row_size(wtype, n_embd);         // wte
-        ctx_size +=   n_ctx*n_embd*ggml_type_sizef(GGML_TYPE_F32); // wpe
+        ctx_size +=   n_ctx*ggml_row_size(GGML_TYPE_F32, n_embd); // wpe
-        ctx_size += n_vocab*n_embd*ggml_type_sizef(wtype);         // lm_head
+        ctx_size += n_vocab*ggml_row_size(wtype, n_embd);         // lm_head
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_g
-        ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_b
-        ctx_size += n_layer*(3*n_embd*n_embd*ggml_type_sizef(wtype));         // c_attn_attn_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         3*n_embd*n_embd)); // c_attn_attn_w
-        ctx_size += n_layer*(       3*n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 3*n_embd));        // c_attn_attn_b
-        ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype));           // c_attn_proj_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         n_embd*n_embd)); // c_attn_proj_w
-        ctx_size += n_layer*(       n_embd*ggml_type_sizef(GGML_TYPE_F32));   // c_attn_proj_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd));        // c_attn_proj_b
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype));         // c_mlp_fc_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         4*n_embd*n_embd)); // c_mlp_fc_w
-        ctx_size += n_layer*(       4*n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd));        // c_mlp_fc_b
-        ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype));         // c_mlp_proj_w
+        ctx_size += n_layer*(ggml_row_size(wtype,         4*n_embd*n_embd)); // c_mlp_proj_w
-        ctx_size += n_layer*(         n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
+        ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32,   n_embd));        // c_mlp_proj_b
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
+        ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_k
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
+        ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_v
        ctx_size += (6 + 12*n_layer)*256; // object overhead
@ -525,8 +525,7 @@ bool gpt2_eval(
            struct ggml_tensor * KQ_scaled =
                ggml_scale(ctx0,
                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head))
+                        1.0f/sqrt(float(n_embd)/n_head));
                        );
            // KQ_masked = mask_past(KQ_scaled)
            // [n_past + N, N, 12]
--- a/examples/talk/talk.cpp
+++ b/examples/talk/talk.cpp
@ -31,6 +31,7 @@ struct whisper_params {
    bool print_special = false;
    bool print_energy  = false;
    bool no_timestamps = true;
    bool use_gpu       = true;
    std::string person    = "Santa";
    std::string language  = "en";
@ -61,6 +62,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else if (arg == "-p"   || arg == "--person")        { params.person        = argv[++i]; }
        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
        else if (arg == "-mw"  || arg == "--model-whisper") { params.model_wsp     = argv[++i]; }
@ -94,6 +96,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,      --print-special [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,      --print-energy  [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -ng,      --no-gpu        [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -p NAME,  --person NAME   [%-7s] person name (for prompt selection)\n",          params.person.c_str());
    fprintf(stderr, "  -l LANG,  --language LANG [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -mw FILE, --model-whisper [%-7s] whisper model file\n",                          params.model_wsp.c_str());
@ -181,8 +184,10 @@ int main(int argc, char ** argv) {
    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
-    struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
+    struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
    // gpt init
--- a/examples/twitch.sh
+++ b/examples/twitch.sh
@ -21,7 +21,7 @@ help()
    echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
    echo "options:"
    echo "-s       Step in seconds (default is $step)."
-    echo "-m       Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large' (default is '$model')."
+    echo "-m       Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
    echo "-t       Number of threads to use."
    echo "-h       Print this help page."
    echo
--- a/examples/wchess/CMakeLists.txt
+++ b/examples/wchess/CMakeLists.txt
@ -0,0 +1,9 @@
 set(CMAKE_CXX_STANDARD 11)
 add_subdirectory(libwchess)
 if (EMSCRIPTEN)
    add_subdirectory(wchess.wasm)
 else()
    add_subdirectory(wchess.cmd)
 endif()
--- a/examples/wchess/README.md
+++ b/examples/wchess/README.md
@ -0,0 +1,45 @@
 # wchess
 Voice-controlled chess using Whisper
 Online demo: https://whisper.ggerganov.com/wchess/
 https://github.com/ggerganov/whisper.cpp/assets/1991296/c2b2f03c-9684-49f3-8106-357d2d4e67fa
 ## Command-line tool
 ```bash
 mkdir build && cd build
 cmake -DWHISPER_SDL2=1 ..
 make -j
 ./bin/wchess -m ../models/ggml-base.en.bin
 Move: start
 a b c d e f g h
 r n b q k b n r 8
 p p p p p p p p 7
 . * . * . * . * 6
 * . * . * . * . 5
 . * . * . * . * 4
 * . * . * . * . 3
 P P P P P P P P 2
 R N B Q K B N R 1
 White's turn
 [(l)isten/(p)ause/(q)uit]: 
 ```
 ## TODO
 - Fix bugs in the chess moves logic
 - Improve web-browser audio capture - sometimes it does not record the voice properly
 - Add support for more languages by making the generated grammar string multilingual
 - Explore ways to improve the dynamic grammar to be narrower
 PRs welcome!
 ## Thanks
 - [chessboardjs](https://chessboardjs.com) for the neat chessboard JS library used in this demo
--- a/examples/wchess/libwchess/CMakeLists.txt
+++ b/examples/wchess/libwchess/CMakeLists.txt
@ -0,0 +1,19 @@
 add_library(wchess-core STATIC
    WChess.cpp
    WChess.h
    Chessboard.cpp
    Chessboard.h
 )
 target_link_libraries(wchess-core
    PUBLIC
    whisper
    common
 )
 target_include_directories(wchess-core
    PUBLIC
    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
 )
 # add_executable(test-chessboard test-chessboard.cpp Chessboard.cpp)
--- a/examples/wchess/libwchess/Chessboard.cpp
+++ b/examples/wchess/libwchess/Chessboard.cpp
@ -0,0 +1,803 @@
 #include "Chessboard.h"
 #include <array>
 #include <vector>
 #include <algorithm>
 #include <cstring>
 #include <set>
 #include <list>
 #include <chrono>
 namespace {
 constexpr std::array<const char*, 64> positions = {
    "a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1",
    "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2",
    "a3", "b3", "c3", "d3", "e3", "f3", "g3", "h3",
    "a4", "b4", "c4", "d4", "e4", "f4", "g4", "h4",
    "a5", "b5", "c5", "d5", "e5", "f5", "g5", "h5",
    "a6", "b6", "c6", "d6", "e6", "f6", "g6", "h6",
    "a7", "b7", "c7", "d7", "e7", "f7", "g7", "h7",
    "a8", "b8", "c8", "d8", "e8", "f8", "g8", "h8",
 };
 constexpr char INVALID_POS = positions.size();
 constexpr int R = 0; // rank index
 constexpr int F = 1; // file index
 #define FILE (c[F] - '1')
 #define RANK (c[R] - 'a')
 constexpr char operator ""_P(const char * c, size_t size) {
    return size < 2 || RANK < 0 || RANK > 7 ||
        FILE < 0 || FILE > 7 ? INVALID_POS : FILE * 8 + RANK;
 }
 #undef FILE
 #undef RANK
 struct sview {
    const char * ptr = nullptr;
    size_t size = 0;
    sview() = default;
    sview(const char * p, size_t s) : ptr(p), size(s) {}
    sview(const std::string& s) : ptr(s.data()), size(s.size()) {}
    size_t find(char del, size_t pos) {
        while (pos < size && ptr[pos] != del) ++pos;
        return pos < size ? pos : std::string::npos;
    }
 };
 std::vector<sview> split(sview str, char del) {
    std::vector<sview> res;
    size_t cur = 0;
    size_t last = 0;
    while (cur != std::string::npos) {
        if (str.ptr[last] == ' ') {
            ++last;
            continue;
        }
        cur = str.find(del, last);
        size_t len = cur == std::string::npos ? str.size - last : cur - last;
        res.emplace_back(str.ptr + last, len);
        last = cur + 1;
    }
    return res;
 }
 char strToPos(sview str) {
    return operator ""_P(str.ptr, str.size);
 }
 constexpr std::array<const char*, 6> pieceNames =  {
    "pawn", "knight", "bishop", "rook", "queen", "king",
 };
 static constexpr std::array<char, 6> blackShort =  {
    'p', 'n', 'b', 'r', 'q', 'k',
 };
 static constexpr std::array<char, 6> whiteShort =  {
    'P', 'N', 'B', 'R', 'Q', 'K',
 };
 char strToType(sview str) {
    auto it = std::find_if(pieceNames.begin(), pieceNames.end(), [str] (const char* name) { return strncmp(name, str.ptr, str.size) == 0; });
    return it != pieceNames.end() ? it - pieceNames.begin() : pieceNames.size();
 }
 // directions
 using Direction = std::array<char, 2>;
 constexpr Direction N   = {(char)  0, (char)  1};
 constexpr Direction NNE = {(char)  1, (char)  2};
 constexpr Direction NE  = {(char)  1, (char)  1};
 constexpr Direction ENE = {(char)  2, (char)  1};
 constexpr Direction E   = {(char)  1, (char)  0};
 constexpr Direction ESE = {(char)  2, (char) -1};
 constexpr Direction SE  = {(char)  1, (char) -1};
 constexpr Direction SSE = {(char)  1, (char) -2};
 constexpr Direction S   = {(char)  0, (char) -1};
 constexpr Direction SSW = {(char) -1, (char) -2};
 constexpr Direction SW  = {(char) -1, (char) -1};
 constexpr Direction WSW = {(char) -2, (char) -1};
 constexpr Direction W   = {(char) -1, (char)  0};
 constexpr Direction WNW = {(char) -2, (char)  1};
 constexpr Direction NW  = {(char) -1, (char)  1};
 constexpr Direction NNW = {(char) -1, (char)  2};
 char makeStep(char pos, const Direction& d) {
    char next[2] = { char(positions[pos][R] + d[R]) , char(positions[pos][F] + d[F]) };
    return strToPos(sview{next, sizeof(next)});
 }
 template<class Modifier>
 char traverse(char pos, const Direction& d, const Modifier& m, int count = 8) {
    while (--count >= 0) {
        pos = makeStep(pos, d);
        if (pos == INVALID_POS || m(pos)) break;
    }
    return pos;
 }
 Direction normalize(const Direction& distance) {
    //return {char((distance[R] > 0) - (distance[R] < 0)), char((distance[F] > 0) - (distance[F] < 0))};
    const int drp = distance[R] > 0 ? 1 : 0;
    const int drn = distance[R] < 0 ? 1 : 0;
    const int dfp = distance[F] > 0 ? 1 : 0;
    const int dfn = distance[F] < 0 ? 1 : 0;
    return {char(drp - drn), char(dfp - dfn)};
 }
 struct Pin {
    Direction d;
    Piece* pinner;
    Piece* pinned;
 };
 using Pins = std::list<Pin>;
 using Board = std::array<Piece*, 64>;
 std::vector<Direction> filter(const Direction& pin, std::initializer_list<Direction> directions) {
    if (pin[R] == 0 && pin[F] == 0) return directions;
    std::vector<Direction> result;
    for (auto& d : directions) {
        if ((d[R] == pin[R] || d[R] == -pin[R]) && (d[F] == pin[F] || d[F] == -pin[F])) result.push_back(d);
    }
    return result;
 }
 }
 class Piece {
 public:
    enum Types : char {
        Pawn,
        Knight,
        Bishop,
        Rook,
        Queen,
        King,
        //
        NUM_PIECES
    };
    enum Colors : char {
        White,
        Black,
    };
    const char* name() const;
    char initial() const;
    Types type() const { return m_type; }
    Colors color() const { return m_color; }
    char pos() const { return m_pos; }
    void setPos(char pos) {
        m_pos = pos;
        invalidate();
    }
    const char* coord() const;
    const std::set<char>& allowed() const { return m_allowed; }
    bool canReach(char pos) const;
    virtual bool movePattern(char pos) const = 0;
    void take();
    virtual void reinit(const State& state) = 0;
    void invalidate();
 protected:
    Piece(Types type, Colors color, char pos, std::set<char> allowed)
        : m_type(type), m_color(color), m_pos(pos), m_allowed(std::move(allowed)) {}
    Piece(const Piece&) = delete;
    ~Piece() = default;
    const Types m_type;
    const Colors m_color;
    char m_pos;
    std::set<char> m_allowed;
    bool m_update = false;
 };
 struct Pawn : public Piece {
    Pawn(Colors color, char pos, std::set<char> next) : Piece(Types::Pawn, color, pos, std::move(next)) {}
    bool is_first_move() const {
        return m_color ? coord()[F] == '7' : coord()[F] == '2';
    }
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        Direction distance = {char(next[R] - cur[R]), char(next[F] - cur[F])};
        char forward = m_color ? -1 : 1;
        return (forward == distance[F] && distance[R] * distance[R] <= 1)
            || (is_first_move() && 2 * forward == distance[F] && distance[R] == 0);
    }
    virtual void reinit(const State& state) override;
 };
 struct Knight : public Piece {
    Knight(Colors color, char pos, std::set<char> next) : Piece(Types::Knight, color, pos, std::move(next)) {}
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        Direction diff = {char(next[R] - cur[R]), char(next[F] - cur[F])};
        return diff[R]*diff[R] + diff[F]*diff[F] == 5;
    }
    virtual void reinit(const State& state) override;
 };
 struct Bishop : public Piece {
    Bishop(Colors color, char pos) : Piece(Types::Bishop, color, pos, {}) {}
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        return cur[R] - cur[F] == next[R] - next[F] || cur[R] + cur[F] == next[R] + next[F];
    }
    virtual void reinit(const State& state) override;
 };
 struct Rook : public Piece {
    Rook(Colors color, char pos) : Piece(Types::Rook, color, pos, {}) {}
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        return cur[R] == next[R] || cur[F] == next[F];
    }
    virtual void reinit(const State& state) override;
 };
 struct Queen : public Piece {
    Queen(Colors color, char pos) : Piece(Types::Queen, color, pos, {}) {}
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        return cur[R] == next[R] || cur[F] == next[F] || cur[R] - cur[F] == next[R] - next[F] || cur[R] + cur[F] == next[R] + next[F];
    }
    virtual void reinit(const State& state) override;
 };
 struct King : public Piece {
    King(Colors color, char pos) : Piece(Types::King, color, pos, {}) {}
    virtual bool movePattern(char pos) const override {
        if (m_pos == INVALID_POS) return false;
        auto cur = coord();
        auto next = positions[pos];
        Direction diff = {char(next[R] - cur[R]), char(next[F] - cur[F])};
        return diff[R]*diff[R] + diff[F]*diff[F] <= 2;
    }
    virtual void reinit(const State& state) override;
 };
 struct PieceSet {
    Piece* begin() { return &p1; }
    Piece* end() { return &r2 + 1; }
    const Piece* begin() const { return &p1; }
    const Piece* end() const { return &r2 + 1; }
    Piece& operator[](int i) { return *(begin() + i); }
    const Piece& operator[](int i) const { return *(begin() + i); }
    Pawn   p1;
    Pawn   p2;
    Pawn   p3;
    Pawn   p4;
    Pawn   p5;
    Pawn   p6;
    Pawn   p7;
    Pawn   p8;
    Rook   r1;
    Knight n1;
    Bishop b1;
    Queen  q;
    King   k;
    Bishop b2;
    Knight n2;
    Rook   r2;
 };
 struct State {
    State();
    PieceSet blacks;
    PieceSet whites;
    Board board;
    Pins blackPins;
    Pins whitePins;
 };
 Direction findPin(const Piece& piece, const State& state) {
    auto& pins = piece.color() ? state.blackPins : state.whitePins;
    auto it = std::find_if(pins.begin(), pins.end(), [&] (const Pin& pin) { return pin.pinned == &piece; });
    if (it != pins.end()) return it->d;
    return {0, 0};
 }
 struct Find {
    Find(const Board& board) : m_board(board) {}
    bool operator() (char pos) const { return m_board[pos]; }
    const Board& m_board;
 };
 struct Add {
    Add(const Board& board, std::set<char>& moves, Piece::Colors color) : m_board(board), m_moves(moves), m_color(color) {}
    bool operator() (char pos) const {
        if (!m_board[pos] || m_board[pos]->color() != m_color) m_moves.insert(pos);
        return m_board[pos];
    }
    const Board& m_board;
    std::set<char>& m_moves;
    Piece::Colors m_color;
 };
 void Pawn::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto pin = findPin(*this, state);
    auto & left = m_color ? SW : NW;
    auto & right = m_color ? SE : NE;
    for (auto& direction : filter(pin, { left, right })) {
        auto pos = makeStep(m_pos, direction);
        if (pos != INVALID_POS && state.board[pos] && state.board[pos]->color() != m_color) m_allowed.insert(pos);
    }
    auto & forward = m_color ? S : N;
    if (!filter(pin, {forward}).empty()) {
        traverse(m_pos, forward, [&] (char pos) {
                if (!state.board[pos]) m_allowed.insert(pos);
                return state.board[pos] || !is_first_move();
            }, 2);
    }
 }
 void Knight::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto pin = findPin(*this, state);
    if (pin[R] != 0 || pin[F] != 0) return;
    for (auto& direction : { NNE, ENE, ESE, SSE, SSW, WSW, WNW, NNW }) {
        auto pos = makeStep(m_pos, direction);
        if (pos != INVALID_POS && (!state.board[pos] || state.board[pos]->color() != m_color)) m_allowed.insert(pos);
    }
 }
 void Bishop::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto pin = findPin(*this, state);
    for (auto& direction : filter(pin, { NE, SE, SW, NW })) {
        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
    }
 }
 void Rook::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto pin = findPin(*this, state);
    for (auto& direction : filter(pin, { N, E, S, W })) {
        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
    }
 }
 void Queen::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto pin = findPin(*this, state);
    for (auto& direction : filter(pin, { N, NE, E, SE, S, SW, W, NW })) {
        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
    }
 }
 void King::reinit(const State& state) {
    if (m_pos == INVALID_POS) return;
    if (!m_update) return;
    m_update = false;
    m_allowed.clear();
    auto& enemyPieces = m_color ? state.whites : state.blacks;
    auto& pawnAttackLeft = m_color ? SW : NW;
    auto& pawnAttackRight = m_color ? SE : NE;
    for (auto& direction : { N, NE, E, SE, S, SW, W, NW }) {
        auto pos = makeStep(m_pos, direction);
        bool accept = pos != INVALID_POS && !(state.board[pos] && state.board[pos]->color() == m_color);
        if (accept) {
            for (auto& p : enemyPieces) {
                if (!p.movePattern(pos)) continue;
                if (p.type() == Piece::Knight || p.type() == Piece::King) {
                    accept = false;
                    break;
                }
                else if (p.type() == Piece::Pawn) {
                    auto from = positions[pos];
                    auto to = p.coord();
                    Direction d {char(to[R] - from[R]), char(to[F] - from[F])};
                    if (d == pawnAttackLeft || d == pawnAttackRight) {
                        accept = false;
                        break;
                    }
                }
                else {
                    auto from = positions[pos];
                    auto to = p.coord();
                    Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
                    auto reached = traverse(pos, d, Find(state.board));
                    if (p.pos() == reached) {
                        accept = false;
                        break;
                    }
                }
            }
        }
        if (accept) m_allowed.insert(pos);
    }
 }
 const char* Piece::name() const {
    static_assert(pieceNames.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
    return pieceNames[m_type];
 }
 char Piece::initial() const {
    static_assert(blackShort.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
    static_assert(whiteShort.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
    return m_color ? blackShort[m_type] : whiteShort[m_type];
 }
 void Piece::invalidate() {
    m_update = true;
 }
 const char* Piece::coord() const {
    if (m_pos == INVALID_POS) return "";
    return positions[m_pos];
 }
 bool Piece::canReach(char pos) const {
    return movePattern(pos) && m_allowed.count(pos);
 }
 void Piece::take() {
    m_pos = INVALID_POS;
    m_allowed = {};
 }
 State::State()
    : blacks {
        {Piece::Black, "a7"_P, {"a5"_P, "a6"_P} },
        {Piece::Black, "b7"_P, {"b5"_P, "b6"_P} },
        {Piece::Black, "c7"_P, {"c5"_P, "c6"_P} },
        {Piece::Black, "d7"_P, {"d5"_P, "d6"_P} },
        {Piece::Black, "e7"_P, {"e5"_P, "e6"_P} },
        {Piece::Black, "f7"_P, {"f5"_P, "f6"_P} },
        {Piece::Black, "g7"_P, {"g5"_P, "g6"_P} },
        {Piece::Black, "h7"_P, {"h5"_P, "h6"_P} },
        {Piece::Black, "a8"_P},
        {Piece::Black, "b8"_P, {"a6"_P, "c6"_P} },
        {Piece::Black, "c8"_P},
        {Piece::Black, "d8"_P},
        {Piece::Black, "e8"_P},
        {Piece::Black, "f8"_P},
        {Piece::Black, "g8"_P, {"f6"_P, "h6"_P} },
        {Piece::Black, "h8"_P},
    }
    , whites {
        {Piece::White, "a2"_P, {"a3"_P, "a4"_P} },
        {Piece::White, "b2"_P, {"b3"_P, "b4"_P} },
        {Piece::White, "c2"_P, {"c3"_P, "c4"_P} },
        {Piece::White, "d2"_P, {"d3"_P, "d4"_P} },
        {Piece::White, "e2"_P, {"e3"_P, "e4"_P} },
        {Piece::White, "f2"_P, {"f3"_P, "f4"_P} },
        {Piece::White, "g2"_P, {"g3"_P, "g4"_P} },
        {Piece::White, "h2"_P, {"h3"_P, "h4"_P} },
        {Piece::White, "a1"_P},
        {Piece::White, "b1"_P, {"a3"_P, "c3"_P} },
        {Piece::White, "c1"_P},
        {Piece::White, "d1"_P},
        {Piece::White, "e1"_P},
        {Piece::White, "f1"_P},
        {Piece::White, "g1"_P, {"f3"_P, "h3"_P} },
        {Piece::White, "h1"_P},
    }
    , board {{
        &whites[ 8],  &whites[ 9],  &whites[10],  &whites[11],  &whites[12],  &whites[13],  &whites[14],  &whites[15],
        &whites[ 0],  &whites[ 1],  &whites[ 2],  &whites[ 3],  &whites[ 4],  &whites[ 5],  &whites[ 6],  &whites[ 7],
        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
        &blacks[ 0],  &blacks[ 1],  &blacks[ 2],  &blacks[ 3],  &blacks[ 4],  &blacks[ 5],  &blacks[ 6],  &blacks[ 7],
        &blacks[ 8],  &blacks[ 9],  &blacks[10],  &blacks[11],  &blacks[12],  &blacks[13],  &blacks[14],  &blacks[15],
    }}
 {}
 Chessboard::Chessboard()
    : m_state(new State())
 {
    setGrammar();
 }
 Chessboard::~Chessboard() = default;
 void Chessboard::setPrompt(const std::string& prompt) {
    m_prompt = prompt;
    setGrammar();
 }
 void Chessboard::setGrammar() {
    m_grammar.clear();
    std::string result;
    if (m_prompt.empty()) {
        result += "move ::= \" \" ((piece | frompos) \" \" \"to \"?)? topos\n";
        //result += "move ::= \" \" frompos \" \" \"to \"? topos\n";
    }
    else {
        // result += "move ::= prompt \" \" ((piece | frompos) \" \" \"to \"?)? topos\n"
        result += "move ::= prompt \" \" frompos \" \" \"to \"? topos\n"
        "prompt ::= \" " + m_prompt + "\"\n";
    }
    std::set<Piece::Types> pieceTypes;
    std::set<char> from_pos;
    std::set<char> to_pos;
    auto& pieces =  m_moveCounter % 2 ? m_state->blacks : m_state->whites;
    std::set<size_t> flags;
    for (auto& p : pieces) {
        if (p.allowed().empty()) continue;
        bool addPiece = false;
        if (!m_inCheck || p.type() == Piece::King) {
            to_pos.insert(p.allowed().begin(), p.allowed().end());
            addPiece = !p.allowed().empty();
        }
        else {
            for (auto move : p.allowed()) {
                if (m_allowedInCheck.count(move)) {
                    to_pos.insert(move);
                    addPiece = true;
                }
            }
        }
        if (addPiece) {
            pieceTypes.insert(p.type());
            from_pos.insert(p.pos());
        }
    }
    if (pieceTypes.empty()) return;
    result += "piece ::= (";
    for (auto& p : pieceTypes) result += " \"" + std::string(pieceNames[p]) + "\" |";
    result.pop_back();
    result += ")\n\n";
    result += "frompos ::= (";
    for (auto& p : from_pos) result += " \"" + std::string(positions[p]) + "\" |";
    result.pop_back();
    result += ")\n";
    result += "topos ::= (";
    for (auto& p : to_pos) result += " \"" + std::string(positions[p]) + "\" |";
    result.pop_back();
    result += ")\n";
    m_grammar = std::move(result);
 }
 std::string Chessboard::stringifyBoard() {
    std::string result;
    result.reserve(16 + 2 * 64 + 16);
    for (char rank = 'a'; rank <= 'h'; ++rank) {
        result.push_back(rank);
        result.push_back(' ');
    }
    result.back() = '\n';
    for (int i = 7; i >= 0; --i) {
        for (int j = 0; j < 8; ++j) {
            auto p = m_state->board[i * 8 + j];
            if (p) result.push_back(p->initial());
            else result.push_back((i + j) % 2 ? '.' : '*');
            result.push_back(' ');
        }
        result.push_back('0' + i + 1);
        result.push_back('\n');
    }
    return result;
 }
 std::string Chessboard::process(const std::string& command) {
    const auto t_start = std::chrono::high_resolution_clock::now();
    auto color = Piece::Colors(m_moveCounter % 2);
    Piece* piece = nullptr;
    auto pos_to = INVALID_POS;
    if (!parseCommand(command, piece, pos_to)) return "";
    auto pos_from = piece->pos();
    if (!move(*piece, pos_to)) return "";
    flagUpdates(pos_from, pos_to);
    detectChecks();
    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
    for (auto& p : enemyPieces) p.reinit(*m_state); // only enemy moves needed next
    std::string result = {positions[pos_from][R], positions[pos_from][F], '-', positions[pos_to][R], positions[pos_to][F]};
    ++m_moveCounter;
    setGrammar();
    const auto t_end = std::chrono::high_resolution_clock::now();
    auto t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
    fprintf(stdout, "%s: Move '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", result.data(), "\033[0m", (int) t_ms);
    if (m_grammar.empty()) result.push_back('#');
    return result;
 }
 bool Chessboard::parseCommand(const std::string& command, Piece*& piece, char& pos_to) {
    auto color = Piece::Colors(m_moveCounter % 2);
    fprintf(stdout, "%s: Command to %s: '%s%.*s%s'\n", __func__, (color ? "Black" : "White"), "\033[1m", int(command.size()), command.data(), "\033[0m");
    if (command.empty()) return false;
    auto tokens = split(command, ' ');
    auto pos_from = INVALID_POS;
    auto type = Piece::Types::NUM_PIECES;
    if (tokens.size() == 1) {
        type = Piece::Types::Pawn;
        pos_to = strToPos(tokens.front());
    }
    else {
        pos_from = strToPos(tokens.front());
        if (pos_from == INVALID_POS) type = Piece::Types(strToType(tokens.front()));
        pos_to = strToPos(tokens.back());
    }
    if (pos_to == INVALID_POS) return false;
    if (pos_from == INVALID_POS) {
        if (type == Piece::Types::NUM_PIECES) return false;
        auto& pieces = color ? m_state->blacks : m_state->whites;
        for (auto& p : pieces) {
            if (p.type() == type && p.canReach(pos_to)) {
                pos_from = p.pos();
                break;
            }
        }
    }
    if (pos_from == INVALID_POS) return false;
    if (m_state->board[pos_from] == nullptr) return false;
    piece = m_state->board[pos_from];
    if (piece->color() != color) return false;
    return true;
 }
 void Chessboard::flagUpdates(char pos_from, char pos_to) {
    auto color = Piece::Colors(m_moveCounter % 2);
    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
    auto& ownPieces = color ? m_state->blacks : m_state->whites;
    for (auto& p : enemyPieces) {
        if (p.movePattern(pos_to) || p.movePattern(pos_from)) {
            updatePins(p);
            p.invalidate();
        }
    }
    for (auto& p : ownPieces) {
        if (p.movePattern(pos_to) || p.movePattern(pos_from)) {
            updatePins(p);
            p.invalidate();
        }
    }
 }
 void Chessboard::updatePins(Piece& piece) {
    if (piece.type() == Piece::Pawn || piece.type() == Piece::Knight || piece.type() == Piece::King) return;
    auto& enemyPieces = piece.color() ? m_state->whites : m_state->blacks;
    auto& enemyPins = piece.color() ? m_state->whitePins : m_state->blackPins;
    auto& king = enemyPieces.k;
    auto it = std::find_if(enemyPins.begin(), enemyPins.end(), [&] (const Pin& pin) { return pin.pinner == &piece; });
    if (it != enemyPins.end()) {
        it->pinned->invalidate();
        enemyPins.erase(it);
    }
    if (piece.movePattern(king.pos())) {
        auto to = positions[king.pos()];
        auto from = piece.coord();
        Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
        auto reached = traverse(piece.pos(), d, Find(m_state->board));
        auto foundPiece = m_state->board[reached];
        if (&king == foundPiece) {
            // check
            king.invalidate();
        }
        else if (foundPiece && foundPiece->color() != piece.color()) {
            reached = traverse(reached, d, Find(m_state->board));
            if (&king == m_state->board[reached]) {
                enemyPins.push_back({d, &piece, foundPiece});
                foundPiece->invalidate();
            }
        }
    }
 }
 void Chessboard::detectChecks() {
    auto color = Piece::Colors(m_moveCounter % 2);
    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
    auto& ownPieces = color ? m_state->blacks : m_state->whites;
    auto& king = enemyPieces.k;
    auto& pawnAttackLeft = color ? SW : NW;
    auto& pawnAttackRight = color ? SE : NE;
    for (auto& p : ownPieces) {
        if (!p.movePattern(king.pos())) continue;
        auto to = positions[king.pos()];
        auto from = p.coord();
        if (p.type() == Piece::Knight) {
            if (!m_inCheck) {
                m_allowedInCheck = { p.pos() };
            }
            else {
                m_allowedInCheck.clear();
            }
            m_inCheck = true;
        }
        else if (p.type() == Piece::Pawn) {
            Direction d {char(to[R] - from[R]), char(to[F] - from[F])};
            if (d == pawnAttackLeft || d == pawnAttackRight) {
                if (!m_inCheck) {
                    m_allowedInCheck = { p.pos() };
                }
                else {
                    m_allowedInCheck.clear();
                }
                m_inCheck = true;
            }
        }
        else {
            Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
            std::set<char> tmp;
            auto pos = traverse(p.pos(), d, Add(m_state->board, tmp, king.color()));
            if (pos == king.pos()) {
                tmp.insert(p.pos());
                if (!m_inCheck) {
                    m_allowedInCheck = std::move(tmp);
                }
                else {
                    m_allowedInCheck.clear();
                }
                m_inCheck = true;
            }
        }
    }
 }
 bool Chessboard::move(Piece& piece, char pos_to) {
    auto& allowed = piece.allowed();
    if (allowed.count(pos_to) == 0 || (m_inCheck && piece.type() != Piece::King && m_allowedInCheck.count(pos_to) == 0)) return false;
    if (m_state->board[pos_to] && m_state->board[pos_to]->color() == piece.color()) return false;
    if (m_state->board[pos_to]) m_state->board[pos_to]->take();
    m_state->board[piece.pos()] = nullptr;
    m_state->board[pos_to] = &piece;
    piece.setPos(pos_to);
    m_inCheck = false;
    m_allowedInCheck.clear();
    return true;
 }
--- a/examples/wchess/libwchess/Chessboard.h
+++ b/examples/wchess/libwchess/Chessboard.h
@ -0,0 +1,33 @@
 #pragma once
 #include <string>
 #include <set>
 #include <memory>
 // just basic validation
 // fixme: missing en passant, castling, promotion, etc.
 struct State;
 class Piece;
 class Chessboard {
 public:
    Chessboard();
    ~Chessboard();
    std::string process(const std::string& command);
    std::string stringifyBoard();
    const std::string& grammar() { return m_grammar; }
    const std::string& prompt() { return m_prompt; }
    void setPrompt(const std::string& prompt);
 private:
    bool parseCommand(const std::string& command, Piece*& piece, char& pos_to);
    bool move(Piece& piece, char pos);
    void flagUpdates(char pos_from, char pos_to);
    void updatePins(Piece& piece);
    void detectChecks();
    void setGrammar();
    std::unique_ptr<State> m_state;
    std::set<char> m_allowedInCheck;
    bool m_inCheck = false;
    int m_moveCounter = 0;
    std::string m_grammar;
    std::string m_prompt;
 };
--- a/examples/wchess/libwchess/WChess.cpp
+++ b/examples/wchess/libwchess/WChess.cpp
@ -0,0 +1,193 @@
 #include "WChess.h"
 #include "Chessboard.h"
 #include "grammar-parser.h"
 #include "common.h"
 #include <thread>
 WChess::WChess(whisper_context * ctx,
        const whisper_full_params & wparams,
        callbacks cb,
        settings s)
        : m_ctx(ctx)
        , m_wparams(wparams)
        , m_cb(cb)
        , m_settings(s)
        , m_board(new Chessboard())
 {}
 WChess::~WChess() = default;
 void WChess::set_move(const std::string& moves, float prob) const {
    if (m_cb.set_move) (*m_cb.set_move)(moves, prob);
 }
 void WChess::set_grammar(const std::string& grammar) const {
    if (m_cb.set_grammar) (*m_cb.set_grammar)(grammar);
 }
 bool WChess::get_audio(std::vector<float>& pcmf32) const {
    if (m_cb.get_audio) return (*m_cb.get_audio)(pcmf32);
    return false;
 }
 std::string WChess::stringify_board() const {
    return m_board->stringifyBoard();
 }
 std::string WChess::get_grammar() const {
    return m_board->grammar();
 }
 void WChess::run() {
    bool have_prompt  = true;
    bool ask_prompt   = !have_prompt;
    float logprob_min  = 0.0f;
    float logprob_sum  = 0.0f;
    int n_tokens  = 0;
    std::vector<float> pcmf32_cur;
    std::vector<float> pcmf32_prompt;
    const std::string k_prompt = have_prompt ? "" : "rook to d4, f3";
    int64_t t_ms = 0;
    if (ask_prompt) {
        fprintf(stdout, "\n");
        fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
        fprintf(stdout, "\n");
        ask_prompt = false;
    }
    while (get_audio(pcmf32_cur)) {
        if (!pcmf32_cur.empty()) {
            // fprintf(stdout, "%s: Processing ...\n", __func__);
            if (!have_prompt) {
                const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
                fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
                const float sim = similarity(txt, k_prompt);
                if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
                    fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
                    ask_prompt = true;
                } else {
                    fprintf(stdout, "\n");
                    fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
                    fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
                    fprintf(stdout, "\n");
                    // save the audio for the prompt
                    pcmf32_prompt = pcmf32_cur;
                    have_prompt = true;
                    m_board->setPrompt(k_prompt);
                }
            } else {
                if (!pcmf32_prompt.empty()) pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
                constexpr size_t MIN_SIZE = 1.2 * WHISPER_SAMPLE_RATE;
                if (MIN_SIZE > pcmf32_cur.size()) pcmf32_cur.insert(pcmf32_cur.begin(), MIN_SIZE - pcmf32_cur.size(), 0.0f);
                // fprintf(stdout, "%s: grammar rules:\n'%s'\n", __func__, m_board->grammar().c_str());
                auto grammar_parsed = grammar_parser::parse(m_board->grammar().c_str());
                auto grammar_rules  = grammar_parsed.c_rules();
                m_wparams.grammar_rules   = grammar_rules.data();
                m_wparams.n_grammar_rules = grammar_rules.size();
                m_wparams.i_start_rule    = grammar_parsed.symbol_ids.at("move");
                auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
                const float p = 100.0f * std::exp(logprob_min);
                fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
                // find the prompt in the text
                float best_sim = 0.0f;
                size_t best_len = 0;
                for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
                    const auto prompt = txt.substr(0, n);
                    const float sim = similarity(prompt, k_prompt);
                    //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
                    if (sim > best_sim) {
                        best_sim = sim;
                        best_len = n;
                    }
                }
                fprintf(stdout, "%s:   DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
                std::string command = ::trim(txt.substr(best_len));
                fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                fprintf(stdout, "\n");
                if (!command.empty()) {
                    set_move(m_board->process(command), p);
                    set_grammar(m_board->grammar());
                }
                if (m_board->grammar().empty()) {
                    fprintf(stdout, "%s: No more moves possible\n", __func__);
                    break;
                }
            }
        }
        if (ask_prompt) {
            fprintf(stdout, "\n");
            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
            fprintf(stdout, "\n");
            ask_prompt = false;
        }
    }
 }
 std::string WChess::transcribe(
                const std::vector<float> & pcmf32,
                float & logprob_min,
                float & logprob_sum,
                int & n_tokens,
                int64_t & t_ms) {
    const auto t_start = std::chrono::high_resolution_clock::now();
    logprob_min = 0.0f;
    logprob_sum = 0.0f;
    n_tokens    = 0;
    t_ms = 0;
    if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
        return {};
    }
    std::string result;
    const int n_segments = whisper_full_n_segments(m_ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(m_ctx, i);
        result += text;
        const int n = whisper_full_n_tokens(m_ctx, i);
        for (int j = 0; j < n; ++j) {
            const auto token = whisper_full_get_token_data(m_ctx, i, j);
            if(token.plog > 0.0f) return {};
            logprob_min = std::min(logprob_min, token.plog);
            logprob_sum += token.plog;
            ++n_tokens;
        }
    }
    const auto t_end = std::chrono::high_resolution_clock::now();
    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
    return result;
 }
--- a/examples/wchess/libwchess/WChess.h
+++ b/examples/wchess/libwchess/WChess.h
@ -0,0 +1,63 @@
 #pragma once
 #include "whisper.h"
 #include <string>
 #include <vector>
 #include <memory>
 class Chessboard;
 class WChess {
 public:
    using CheckRunningCb = bool (*)();
    using GetAudioCb = bool (*)(std::vector<float> &);
    using SetMovesCb = void (*)(const std::string &, float);
    using SetGrammarCb = void (*)(const std::string &);
    using ClearAudioCb = void (*)();
    struct callbacks {
        GetAudioCb get_audio = nullptr;
        SetMovesCb set_move = nullptr;
        SetGrammarCb set_grammar = nullptr;
    };
    struct settings {
        int32_t vad_ms     = 2000;
        int32_t prompt_ms  = 5000;
        int32_t command_ms = 4000;
        float vad_thold    = 0.2f;
        float freq_thold   = 100.0f;
        bool print_energy  = false;
    };
    WChess(
        whisper_context * ctx,
        const whisper_full_params & wparams,
        callbacks cb,
        settings s
    );
    ~WChess();
    void run();
    std::string stringify_board() const;
    std::string get_grammar() const;
 private:
    bool get_audio(std::vector<float>& pcmf32) const;
    void set_move(const std::string& moves, float prob) const;
    void set_grammar(const std::string& grammar) const;
    std::string transcribe(
                    const std::vector<float> & pcmf32,
                    float & logprob_min,
                    float & logprob_sum,
                    int & n_tokens,
                    int64_t & t_ms);
    whisper_context * m_ctx;
    whisper_full_params m_wparams;
    const callbacks m_cb;
    const settings m_settings;
    std::unique_ptr<Chessboard> m_board;
 };
--- a/examples/wchess/libwchess/test-chessboard.cpp
+++ b/examples/wchess/libwchess/test-chessboard.cpp
@ -0,0 +1,117 @@
 #include "Chessboard.h"
 #define ASSERT(x) \
    do { \
        if (!(x)) { \
            fprintf(stderr, "ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
            fflush(stderr); \
            exit(1); \
        } \
    } while (0)
 int main() {
    {
        Chessboard chess;
        ASSERT(chess.process("pawn to d4") == "d2-d4");
        ASSERT(chess.process("e5") == "e7-e5");
        ASSERT(chess.process("c1 h6") == "c1-h6");
        ASSERT(chess.process("queen h4") == "d8-h4");
        ASSERT(chess.process("bishop to g5") == "h6-g5");
        ASSERT(chess.process("bishop to b4") == "f8-b4");
        ASSERT(chess.process("c4") == "");
        ASSERT(chess.process("knight c3") == "b1-c3");
        ASSERT(chess.process("knight c6") == "b8-c6");
        ASSERT(chess.process("f3") == "");
    }
    {
        Chessboard chess;
        ASSERT(chess.process("d4") == "d2-d4");
        ASSERT(chess.process("e5") == "e7-e5");
        ASSERT(chess.process("e4") == "e2-e4");
        ASSERT(chess.process("queen h4") == "d8-h4");
        ASSERT(chess.process("queen h5") == "d1-h5");
        ASSERT(chess.process("f5") == "");
        ASSERT(chess.process("g6") == "g7-g6");
        ASSERT(chess.process("knight e2") == "g1-e2");
        ASSERT(chess.process("f5") == "f7-f5");
        ASSERT(chess.process("knight g3") == "e2-g3");
        ASSERT(chess.process("g5") == "");
        ASSERT(chess.process("king e7") == "e8-e7");
        ASSERT(chess.process("f4") == "f2-f4");
        ASSERT(chess.process("g5") == "g6-g5");
    }
    {
        Chessboard chess;
        ASSERT(chess.process("e4") == "e2-e4");
        ASSERT(chess.process("c5") == "c7-c5");
        ASSERT(chess.process("e5") == "e4-e5");
        ASSERT(chess.process("c4") == "c5-c4");
        ASSERT(chess.process("e6") == "e5-e6");
        ASSERT(chess.process("c3") == "c4-c3");
        ASSERT(chess.process("e7") == "");
        ASSERT(chess.process("f7") == "e6-f7");
        ASSERT(chess.process("d2") == "");
        ASSERT(chess.process("king to f7") == "e8-f7");
        ASSERT(chess.process("f4") == "f2-f4");
        ASSERT(chess.process("d2") == "c3-d2");
        ASSERT(chess.process("f5") == "");
        ASSERT(chess.process("king to e2") == "e1-e2");
        ASSERT(chess.process("king to g6") == "f7-g6");
        ASSERT(chess.process("f5") == "f4-f5");
        ASSERT(chess.process("e6") == "");
        ASSERT(chess.process("king to h5") == "g6-h5");
        ASSERT(chess.process("g4") == "g2-g4");
        ASSERT(chess.process("king to g5") == "h5-g5");
        ASSERT(chess.process("h4") == "h2-h4");
        ASSERT(chess.process("king to h5") == "");
        ASSERT(chess.process("king to g6") == "");
        ASSERT(chess.process("king to h6") == "g5-h6");
        ASSERT(chess.process("bishop to d2") == "c1-d2");
        ASSERT(chess.process("king to g5") == "");
        ASSERT(chess.process("g5") == "g7-g5");
    }
    {
        Chessboard chess;
        ASSERT(chess.process("f4") == "f2-f4");
        ASSERT(chess.process("e5") == "e7-e5");
        ASSERT(chess.process("g4") == "g2-g4");
        ASSERT(chess.process("queen to h4") == "d8-h4#");
        ASSERT(chess.process("knight f3") == "");
        ASSERT(chess.grammar().empty());
    }
    {
        Chessboard chess;
        ASSERT(chess.process("f4") == "f2-f4");
        ASSERT(chess.process("e5") == "e7-e5");
        ASSERT(chess.process("g4") == "g2-g4");
        ASSERT(chess.process("d5") == "d7-d5");
        ASSERT(chess.process("g1 f3") == "g1-f3");
        ASSERT(chess.process("queen to h4") == "d8-h4");
        ASSERT(!chess.grammar().empty());
    }
    {
        Chessboard chess;
        ASSERT(chess.process("knight c3") == "b1-c3");
        ASSERT(chess.process("knight c6") == "b8-c6");
        ASSERT(chess.process("knight b5") == "c3-b5");
        ASSERT(chess.process("knight f6") == "g8-f6");
        ASSERT(chess.process("knight d6") == "b5-d6");
        ASSERT(chess.process("knight d4") == "");
        ASSERT(chess.process("d6") == "c7-d6");
        ASSERT(chess.process("e4") == "e2-e4");
        ASSERT(chess.process("knight d4") == "c6-d4");
        ASSERT(chess.process("d3") == "d2-d3");
        ASSERT(chess.process("knight e4") == "f6-e4");
        ASSERT(chess.process("king to e2") == "");
        ASSERT(chess.process("king to d2") == "");
    }
 }
--- a/examples/wchess/wchess.cmd/CMakeLists.txt
+++ b/examples/wchess/wchess.cmd/CMakeLists.txt
@ -0,0 +1,8 @@
 if (WHISPER_SDL2)
    set(TARGET wchess)
    add_executable(${TARGET} wchess.cmd.cpp)
    include(DefaultTargetOptions)
    target_link_libraries(${TARGET} PRIVATE wchess-core common-sdl ${CMAKE_THREAD_LIBS_INIT})
 endif ()
--- a/examples/wchess/wchess.cmd/wchess.cmd.cpp
+++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp
@ -0,0 +1,247 @@
 // Command line voice assisted chess
 //
 // Speak chess move commands to the microphone.
 // The moves will translated to chessboard positions.
 //
 //
 #include "WChess.h"
 #include "common-sdl.h"
 #include <iostream>
 #include <memory>
 #include <thread>
 // command-line parameters
 struct whisper_params {
    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t prompt_ms  = 5000;
    int32_t command_ms = 8000;
    int32_t capture_id = -1;
    int32_t max_tokens = 32;
    int32_t audio_ctx  = 0;
    float vad_thold  = 0.6f;
    float freq_thold = 100.0f;
    float grammar_penalty = 100.0f;
    bool speed_up      = false;
    bool translate     = false;
    bool print_special = false;
    bool print_energy  = false;
    bool no_timestamps = true;
    bool use_gpu       = true;
    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
    std::string fname_out;
    std::string commands;
    std::string prompt;
    std::string context;
    std::string grammar;
 };
 void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h,         --help           [default] show this help message and exit\n");
    fprintf(stderr, "  -t N,       --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
    fprintf(stderr, "  -pms N,     --prompt-ms N    [%-7d] prompt duration in milliseconds\n",             params.prompt_ms);
    fprintf(stderr, "  -cms N,     --command-ms N   [%-7d] command duration in milliseconds\n",            params.command_ms);
    fprintf(stderr, "  -c ID,      --capture ID     [%-7d] capture device ID\n",                           params.capture_id);
    fprintf(stderr, "  -mt N,      --max-tokens N   [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
    fprintf(stderr, "  -ac N,      --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
    fprintf(stderr, "  -vth N,     --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
    fprintf(stderr, "  -fth N,     --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
    fprintf(stderr, "  -su,        --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "  -f FNAME,   --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
    fprintf(stderr, "  -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n",             params.commands.c_str());
    fprintf(stderr, "  -p,         --prompt         [%-7s] the required activation prompt\n",              params.prompt.c_str());
    fprintf(stderr, "  -ctx,       --context        [%-7s] sample text to help the transcription\n",       params.context.c_str());
    fprintf(stderr, "  --grammar-penalty N          [%-7.1f] scales down logits of nongrammar tokens\n",   params.grammar_penalty);
    fprintf(stderr, "\n");
 }
 bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    for (int i = 1; i < argc; i++) {
        std::string arg = argv[i];
        if (arg == "-h" || arg == "--help") {
            whisper_print_usage(argc, argv, params);
            exit(0);
        }
        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
        else if (arg == "-pms" || arg == "--prompt-ms")     { params.prompt_ms     = std::stoi(argv[++i]); }
        else if (arg == "-cms" || arg == "--command-ms")    { params.command_ms    = std::stoi(argv[++i]); }
        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
        else if (arg == "-cmd" || arg == "--commands")      { params.commands      = argv[++i]; }
        else if (arg == "-p"   || arg == "--prompt")        { params.prompt        = argv[++i]; }
        else if (arg == "-ctx" || arg == "--context")       { params.context       = argv[++i]; }
        else if (                 arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
            exit(0);
        }
    }
    return true;
 }
 std::unique_ptr<WChess> g_wchess;
 int g_moveCount = 0;
 void set_move(const std::string & move, float) {
    if (!move.empty()) {
        g_moveCount++;
        fprintf(stdout, "Move: %s\n\n", move.c_str());
    }
    else fprintf(stdout, "Move rejected\n\n");
    fprintf(stdout, "%s\n", g_wchess->stringify_board().c_str());
    fprintf(stdout, "%s\n", g_moveCount ? "White's turn" : "Black's turn");
 }
 audio_async g_audio(30*1000);
 bool g_listening = false;
 std::vector<float> g_pcmf32;
 bool read_input() {
    std::string input;
    while (true) {
        fprintf(stdout, "[(l)isten/(p)ause/(q)uit]: ");
        std::cin >> input;
        fprintf(stdout, "\n");
        if (input[0] == 'q') {
            fprintf(stdout, "Quitting\n");
            return false;
        }
        if (input[0] == 'l') {
            if (!g_listening) {
                fprintf(stdout, "Listening\n");
                g_listening = true;
                g_pcmf32.clear();
                g_audio.resume();
                g_audio.clear();
            }
            else fprintf(stdout, "Still listening\n");
            return true;
        }
        else {
            if (g_listening) {
                g_listening = false;
                g_audio.get(0, g_pcmf32);
                g_audio.pause();
                fprintf(stdout, "Processing\n");
            }
            else fprintf(stdout, "Not listening\n");
            return true;
        }
    }
    return true;
 }
 bool get_audio(std::vector<float> & pcmf32_cur) {
    if (!read_input()) return false;
    if (!g_pcmf32.empty()) pcmf32_cur = std::move(g_pcmf32);
    else pcmf32_cur.clear();
    return true;
 }
 int main(int argc, char ** argv) {
    whisper_params params;
    if (whisper_params_parse(argc, argv, params) == false) {
        return 1;
    }
    if (whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params);
        exit(0);
    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    if (!ctx) {
        fprintf(stderr, "%s: whisper_init_from_file_with_params() failed!\n", __func__);
        return 1;
    }
    // init audio
    if (!g_audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
        return 1;
    }
    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
    wparams.offset_ms        = 0;
    wparams.translate        = false;
    wparams.no_context       = true;
    wparams.single_segment   = true;
    wparams.print_realtime   = false;
    wparams.print_progress   = false;
    wparams.print_timestamps = true;
    wparams.print_special    = false;
    wparams.no_timestamps    = true;
    wparams.max_tokens       = 32;
    wparams.audio_ctx        = 768; // partial encoder context for better performance
    wparams.temperature     = 0.0f;
    wparams.temperature_inc = 2.0f;
    wparams.greedy.best_of  = 1;
    wparams.beam_search.beam_size = 1;
    wparams.language         = "en";
    wparams.grammar_penalty = 100.0;
    wparams.initial_prompt = params.context.data();
    WChess::callbacks cb;
    cb.get_audio = get_audio;
    cb.set_move = set_move;
    WChess::settings s;
    s.vad_ms = 2000;
    s.prompt_ms = params.prompt_ms;
    s.command_ms = params.command_ms;
    s.vad_thold = params.vad_thold;
    s.freq_thold = params.freq_thold;
    s.print_energy = params.print_energy;
    g_wchess.reset(new WChess(ctx, wparams, cb, s));
    set_move("start", 0);
    g_wchess->run();
    whisper_print_timings(ctx);
    whisper_free(ctx);
    return 0;
 }
--- a/examples/wchess/wchess.wasm/CMakeLists.txt
+++ b/examples/wchess/wchess.wasm/CMakeLists.txt
@ -0,0 +1,51 @@
 set(TARGET wchess.wasm)
 add_executable(${TARGET}
    wchess.wasm.cpp
    )
 include(DefaultTargetOptions)
 target_link_libraries(${TARGET} PRIVATE
    common
    wchess-core
    )
 unset(EXTRA_FLAGS)
 if (WHISPER_WASM_SINGLE_FILE)
    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
    message(STATUS "Embedding WASM inside chess.js")
    add_custom_command(
        TARGET ${TARGET} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy
        ${CMAKE_BINARY_DIR}/bin/${TARGET}.js
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js
        )
 endif()
 set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
    -s INITIAL_MEMORY=1024MB \
    -s TOTAL_MEMORY=1024MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
    ")
 add_custom_command(
        TARGET ${TARGET} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        ${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/
        COMMAND ${CMAKE_COMMAND} -E copy
        ${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/
    )
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
 configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY)
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
@ -0,0 +1,54 @@
 /*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
 .clearfix-7da63 {
  clear: both;
 }
 .board-b72b1 {
  border: 2px solid #404040;
  box-sizing: content-box;
 }
 .square-55d63 {
  float: left;
  position: relative;
  /* disable any native browser highlighting */
  -webkit-touch-callout: none;
    -webkit-user-select: none;
     -khtml-user-select: none;
       -moz-user-select: none;
        -ms-user-select: none;
            user-select: none;
 }
 .white-1e1d7 {
  background-color: #f0d9b5;
  color: #b58863;
 }
 .black-3c85d {
  background-color: #b58863;
  color: #f0d9b5;
 }
 .highlight1-32417, .highlight2-9c5d2 {
  box-shadow: inset 0 0 3px 3px yellow;
 }
 .notation-322f9 {
  cursor: default;
  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
  font-size: 14px;
  position: absolute;
 }
 .alpha-d2270 {
  bottom: 1px;
  right: 3px;
 }
 .numeric-fc462 {
  top: 2px;
  left: 2px;
 }
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
@ -0,0 +1,2 @@
 /*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
 .clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md
@ -0,0 +1,32 @@
 # chessboard.js Change Log
 All notable changes to this project will be documented in this file.
 ## [1.0.0] - 2019-06-11
 - Orientation methods now return current orientation. [Issue #64]
 - Drop support for IE8
 - Do not check for `window.JSON` (Error #1004)
 - Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however)
 - id query selectors are now supported as the first argument to `Chessboard()`
 - Remove Error #1002
 - Format code according to [StandardJS]
 - Bump minimum jQuery version to 1.8.3
 - Throttle piece drag functions
 ## [0.3.0] - 2013-08-10
 - Added `appearSpeed` animation config property
 - Added `onSnapbackEnd` event
 - Added `onMoveEnd` event
 ## [0.2.0] - 2013-08-05
 - Added `onMouseoverSquare` and `onMouseoutSquare` events
 - Added `onSnapEnd` event
 - Added square code as CSS class on the squares
 - Added [chess.js] integration examples
 ## [0.1.0] - 2013-05-21
 - Initial release
 [chess.js]:https://github.com/jhlywa/chess.js
 [Issue #64]:https://github.com/oakmac/chessboardjs/issues/64
 [StandardJS]:https://standardjs.com/
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md
@ -0,0 +1,20 @@
 Copyright 2019 Chris Oakman
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/README.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/README.md
@ -0,0 +1,82 @@
 # chessboard.js
 chessboard.js is a JavaScript chessboard component. It depends on [jQuery].
 Please see [chessboardjs.com] for documentation and examples.
 ## What is chessboard.js?
 chessboard.js is a JavaScript chessboard component with a flexible "just a
 board" API that
 chessboard.js is a standalone JavaScript Chess Board. It is designed to be "just
 a board" and expose a powerful API so that it can be used in different ways.
 Here's a non-exhaustive list of things you can do with chessboard.js:
 - Use chessboard.js to show game positions alongside your expert commentary.
 - Use chessboard.js to have a tactics website where users have to guess the best
  move.
 - Integrate chessboard.js and [chess.js] with a PGN database and allow people to
  search and playback games (see [Example 5000])
 - Build a chess server and have users play their games out using the
  chessboard.js board.
 chessboard.js is flexible enough to handle any of these situations with relative
 ease.
 ## What can chessboard.js **not** do?
 The scope of chessboard.js is limited to "just a board." This is intentional and
 makes chessboard.js flexible for handling a multitude of chess-related problems.
 This is a common source of confusion for new users. [remove?]
 Specifically, chessboard.js does not understand anything about how the game of
 chess is played: how a knight moves, who's turn is it, is White in check?, etc.
 Fortunately, the powerful [chess.js] library deals with exactly this sort of
 problem domain and plays nicely with chessboard.js's flexible API. Some examples
 of chessboard.js combined with chess.js: 5000, 5001, 5002
 Please see the powerful [chess.js] library for an API to deal with these sorts
 of questions.
 This logic is distinct from the logic of the board. Please see the powerful
 [chess.js] library for this aspect of your application.
 Here is a list of things that chessboard.js is **not**:
 - A chess engine
 - A legal move validator
 - A PGN parser
 chessboard.js is designed to work well with any of those things, but the idea
 behind chessboard.js is that the logic that controls the board should be
 independent of those other problems.
 ## Docs and Examples
 - Docs - <http://chessboardjs.com/docs>
 - Examples - <http://chessboardjs.com/examples>
 ## Developer Tools
 ```sh
 # create a build in the build/ directory
 npm run build
 # re-build the website
 npm run website
 ```
 ## License
 [MIT License](LICENSE.md)
 [jQuery]:https://jquery.com/
 [chessboardjs.com]:http://chessboardjs.com
 [chess.js]:https://github.com/jhlywa/chess.js
 [Example 5000]:http://chessboardjs.com/examples#5000
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json
@ -0,0 +1,29 @@
 {
  "author": "Chris Oakman <chris@oakmac.com> (http://chrisoakman.com/)",
  "name": "@chrisoakman/chessboardjs",
  "description": "JavaScript chessboard widget",
  "homepage": "https://chessboardjs.com",
  "license": "MIT",
  "version": "1.0.0",
  "repository": {
    "type": "git",
    "url": "git://github.com/oakmac/chessboardjs.git"
  },
  "files": ["dist/"],
  "dependencies": {
    "jquery": ">=3.4.1"
  },
  "devDependencies": {
    "csso": "3.5.1",
    "fs-plus": "3.1.1",
    "kidif": "1.1.0",
    "mustache": "2.3.0",
    "standard": "10.0.2",
    "uglify-js": "3.6.0"
  },
  "scripts": {
    "build": "standard lib/chessboard.js && node scripts/build.js",
    "standard": "standard --fix lib/*.js website/js/*.js",
    "website": "node scripts/website.js"
  }
 }
--- a/examples/wchess/wchess.wasm/index-tmpl.html
+++ b/examples/wchess/wchess.wasm/index-tmpl.html
@ -0,0 +1,499 @@
 <!doctype html>
 <html lang="en-us">
    <head>
        <title>wchess : voice-controlled chess using Whisper + WebAssembly</title>
        <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
        <meta name="viewport" content="width=device-width, initial-scale=0.7, maximum-scale=1, minimum-scale=0.7, user-scalable=no"/>
        <meta name="apple-mobile-web-app-capable" content="yes" />
        <style>
            #output {
                width: 100%;
                height: 100%;
                margin: 0 auto;
                margin-top: 10px;
                border-left: 0px;
                border-right: 0px;
                padding-left: 0px;
                padding-right: 0px;
                display: block;
                background-color: black;
                color: white;
                font-size: 10px;
                font-family: 'Lucida Console', Monaco, monospace;
                outline: none;
                white-space: pre;
                overflow-wrap: normal;
                overflow-x: scroll;
            }
            .button {
                background-color: #000000;
                color: #FFFFFF;
                padding: 20px;
                border-radius: 10px;
                -moz-border-radius: 10px;
                -webkit-border-radius: 10px;
                margin:10px;
                width:  100px;
                height:  50px;
                -webkit-touch-callout: none; /* Safari */
                -webkit-user-select: none; /* Chrome */
                -moz-user-select: none; /* Firefox */
                -ms-user-select: none; /* Internet Explorer/Edge */
                user-select: none;
            }
            button[disabled]{
                background-color: #cccccc;
                color: #666666;
                padding: 20px;
                border-radius: 10px;
                -moz-border-radius: 10px;
                -webkit-border-radius: 10px;
                margin:10px;
                width: 100px;
            }
            .center {
                display: flex;
                justify-content: center;
                align-items: center;
                width: 500px;
            }
            #description {
                width: 500px;
            }
        </style>
        <link rel="stylesheet" href="css/chessboard-1.0.0.min.css" integrity="sha384-q94+BZtLrkL1/ohfjR8c6L+A6qzNH9R2hBLwyoAfu3i/WCvQjzL2RQJ3uNHDISdU" crossorigin="anonymous">
    </head>
    <body>
        <div id="main-container">
            <div id="description">
                <b>wchess : voice-controlled chess using Whisper + WebAssembly</b>
                <br><br>
                This is a demonstration of using Whisper to recognize voice commands in the browser.
                <br><br>
                Usage:<br>
                <ul>
                    <li>Select a Whisper model</li>
                    <li>Accept the microphone permission request if prompted</li>
                    <li>Hold the button and say a chess move (e.g. "Knight to c3")</li>
                    <li>Release the button and wait for the move to be recognized</li>
                    <li>Repeat</li>
                </ul>
                Examples:<br>
                <ul>
                    <li><b>"d4"</b></li>
                    <li><b>"e2 e4"</b></li>
                    <li><b>"Knight f3"</b></li>
                    <li><b>"Bishop to b5"</b></li>
                </ul>
                Features:<br>
                <ul>
                    <li>Model quantization for reduced memory footprint (~42MB)</li>
                    <li><a href="https://github.com/ggerganov/whisper.cpp/pull/1229">Grammar-based sampling</a> for improved recognition accuracy</li>
                </ul>
                <b>
                Note that not all chess moves are supported. For example, castling and pawn promotion
                currently do not work, but can be easily implemented. There could also be some bugs in
                the move handling logic in general. The main reason for that is to keep the implementation
                simple. The assumption is that a real application would already have a proper move
                validation logic in place.<br><br>
                The main purpose of this example is to demonstrate the capabilities of whisper.cpp and
                its application in the browser for voice recognition locally on your device.
                </b>
                <br><br>
                You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/wchess">GitHub</a>.
                <br><br>
                <b>More examples:</b>
                    <a href="https://whisper.ggerganov.com/">main</a> |
                    <a href="https://whisper.ggerganov.com/bench">bench</a> |
                    <a href="https://whisper.ggerganov.com/stream">stream</a> |
                    <a href="https://whisper.ggerganov.com/command">command</a> |
                    <a href="https://whisper.ggerganov.com/talk">talk</a> |
                <br><br>
            </div>
            <hr>
            <div id="model-whisper">
                Whisper model: <span id="model-whisper-status"></span>
                <button id="fetch-whisper-tiny-en" onclick="loadWhisper()">tiny.en (Q8_0, 42 MB)</button>
                <span id="fetch-whisper-progress"></span>
                <br><br>
                <button id="clear" onclick="clearCache()">Clear browser cache</button>
                <!--
                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
                -->
            </div>
            <div id="game">
                <br>
                <div id="chessboard" style="width: 500px"></div>
                <script src="js/jquery-3.7.1.min.js"></script>
                <script src="js/chessboard-1.0.0.min.js"></script>
                <script>
                    var board = Chessboard('chessboard', 'start')
                    var move_count = 0;
                </script>
                <br>
                <div id="state">
                    Status: <b><span id="state-status">select model</span></b>
                    <div id="input" class="center">
                        <button id="toggler" class="button" onselectstart="return false" style="display: none">Hold</button>
                    </div>
                    <pre id="state-grammar">[The grammar will be displayed here]</pre>
                    <pre id="state-moves">[The moves will be displayed here]</pre>
                </div>
            </div>
            <hr>
            Debug output:
            <textarea id="output" rows="20"></textarea>
            <br>
            <b>Troubleshooting</b>
            <br><br>
            The page does some heavy computations, so make sure:
            <ul>
                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
            </ul>
            <div class="cell-version">
                <span>
                    |
                    Build time: <span class="nav-link">@GIT_DATE@</span> |
                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
                </span>
            </div>
        </div>
        <script type="text/javascript" src="js/helpers.js"></script>
        <script type='text/javascript'>
            // web audio context
            var context = null;
            // the command instance
            var instance = null;
            // model name
            var model_whisper = null;
            var model_file = null;
            var module_ready = null;
            var Module = {
                print: printTextarea,
                printErr: printTextarea,
                setStatus: function(text) {
                    printTextarea('js: ' + text);
                },
                monitorRunDependencies: function(left) {
                },
                preRun: function() {
                    printTextarea('js: Preparing ...');
                },
                postRun: function() {
                    printTextarea('js: Module initialized successfully!');
                    module_ready = true;
                    initInstance();
                }
            };
            function initInstance() {
                if (!module_ready || !model_file || instance) return
                instance = Module.init(model_file);
                if (instance) {
                    setStatus('Ready');
                    printTextarea("js: whisper initialized, instance: " + instance);
                }
                else {
                    printTextarea("js: failed to initialize whisper");
                }
            }
            function setStatus(text) {
                document.getElementById('state-status').innerHTML = text;
            }
            //
            // fetch models
            //
            let dbVersion = 1
            let dbName    = 'whisper.ggerganov.com';
            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
            function storeFS(fname, buf) {
                // write to WASM file using FS_createDataFile
                // if the file exists, delete it
                try {
                    Module.FS_unlink(fname);
                } catch (e) {
                    // ignore
                }
                Module.FS_createDataFile("/", fname, buf, true, true);
                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
                model_file = fname;
                initInstance();
            }
            function loadWhisper() {
                setStatus('Loading')
                //let url     = 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin';
                let url     = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q8_0.bin';
                let dst     = 'whisper.bin';
                let size_mb = 42;
                model_whisper = 'tiny.en-q8_0';
                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model_whisper + '" ... ';
                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
                cbProgress = function(p) {
                    let el = document.getElementById('fetch-whisper-progress');
                    el.innerHTML = Math.round(100*p) + '%';
                };
                cbCancel = function() {
                    var el;
                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
                };
                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
                // init audio capture so that the user receives a permission request
                {
                    let context = new AudioContext({
                        sampleRate: 16000,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                    navigator.mediaDevices.getUserMedia({audio: true, video: false})
                        .then(function(s) {
                            stream = s;
                            stream.getTracks().forEach(function(track) {
                                track.stop();
                            });
                        })
                        .catch(function(err) {
                            printTextarea('js: error getting audio stream: ' + err);
                        });
                    context.close();
                }
                document.getElementById('toggler').style.display = 'block';
            }
            //
            // microphone
            //
            const kSampleRate = 16000;
            const kRestartRecording_s = 120;
            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
            var mediaRecorder = null;
            var doRecording = false;
            var startTime = 0;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function stopRecording() {
                if (mediaRecorder) {
                    mediaRecorder.stop();
                }
            }
            function startRecording() {
                if (!context) {
                    context = new AudioContext({
                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                }
                startTime = Date.now();
                var chunks = [];
                var stream = null;
                navigator.mediaDevices.getUserMedia({audio: true, video: false})
                    .then(function(s) {
                        stream = s;
                        mediaRecorder = new MediaRecorder(stream);
                        mediaRecorder.ondataavailable = function(e) {
                            chunks.push(e.data);
                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
                            var reader = new FileReader();
                            reader.onload = function(event) {
                                var buf = new Uint8Array(reader.result);
                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
                                    var source = offlineContext.createBufferSource();
                                    source.buffer = audioBuffer;
                                    source.connect(offlineContext.destination);
                                    source.start(0);
                                    offlineContext.startRendering().then(function(renderedBuffer) {
                                        let audio = renderedBuffer.getChannelData(0);
                                        printTextarea('js: number of samples: ' + audio.length);
                                        Module.set_audio(instance, audio);
                                    });
                                    mediaRecorder = null;
                                    context = null;
                                });
                            }
                            reader.readAsArrayBuffer(blob);
                        };
                        mediaRecorder.onstop = function(e) {
                            stream.getTracks().forEach(function(track) {
                                track.stop();
                            });
                        };
                        mediaRecorder.start();
                    })
                    .catch(function(err) {
                        printTextarea('js: error getting audio stream: ' + err);
                    });
            }
            //
            // main
            //
            var nLines = 0;
            var movesAll = '';
            // document.body.addEventListener('keydown', function(event) {
            //     if (event.keyCode === 32) {
            //         document.getElementById('toggler').innerText = "";
            //         onStart();
            //     }
            // }, true);
            // document.body.addEventListener('keyup', function(event) {
            //     if (event.keyCode === 32) {
            //         document.getElementById('toggler').innerText = "Hold";
            //         onStop();
            //     }
            // }, true);
            document.getElementById('toggler').addEventListener("touchstart", function(event){
                this.innerText = "";
                onStart();
            }, true);
            document.getElementById('toggler').addEventListener("touchend", function(event){
                this.innerText = "Hold";
                onStop();
            }, true)
            document.getElementById('toggler').addEventListener('mousedown', function(event) {
                this.innerText = "";
                onStart();
            }, true);
            document.getElementById('toggler').addEventListener('mouseup', function(event) {
                this.innerText = "Hold";
                onStop();
            }, true);
            function onStart() {
                if (!instance) return;
                setStatus('Listening');
                startRecording();
            }
            function onStop() {
                setStatus('Processing');
                printTextarea('js: stopping recording ...');
                stopRecording();
            }
            function setMove(move, prob) {
                if (move != null && move.length > 1) {
                    let gameOver =  move[move.length - 1] === '#';
                    if (gameOver) {
                        move = move.substring(0, move.length - 1);
                        document.getElementById('toggler').disabled = true;
                    }
                    board.move(move);
                    movesAll += move + ', prob = ' + prob.toFixed(2) + '% <br>';
                    nLines++;
                    // if more than 10 lines, remove the first line
                    if (nLines > 10) {
                        var i = movesAll.indexOf('<br>');
                        if (i > 0) {
                            movesAll = movesAll.substring(i + 4);
                            nLines--;
                        }
                    }
                    ++move_count;
                    setStatus(gameOver ? 'Done' : move_count % 2 ? 'Black\'s turn' : 'White\'s turn');
                    document.getElementById('state-moves').innerHTML = movesAll;
                }
                else {
                    setStatus('Failed. ' + (move_count % 2 ? 'Black\'s turn' : 'White\'s turn'));
                }
            }
            function setGrammar(grammar) {
                document.getElementById('state-grammar').innerHTML = grammar;
            }
        </script>
        <script type="text/javascript" src="js/chess.js"></script>
    </body>
 </html>
--- a/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
+++ b/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
--- a/examples/wchess/wchess.wasm/wchess.wasm.cpp
+++ b/examples/wchess/wchess.wasm/wchess.wasm.cpp
@ -0,0 +1,141 @@
 #include <WChess.h>
 #include <emscripten.h>
 #include <emscripten/bind.h>
 #include <thread>
 constexpr int N_THREAD = 8;
 std::vector<struct whisper_context *> g_contexts(4, nullptr);
 std::mutex  g_mutex;
 std::thread g_worker;
 std::condition_variable g_cv;
 bool g_running(false);
 std::vector<float> g_pcmf32;
 void set_move(const std::string & move, float prob) {
    MAIN_THREAD_EM_ASM({
        setMove(UTF8ToString($0), $1)
    }, move.c_str(), prob);
 }
 void set_grammar(const std::string & grammar) {
    MAIN_THREAD_EM_ASM({
        setGrammar(UTF8ToString($0))
    }, grammar.c_str());
 }
 bool get_audio(std::vector<float> & audio) {
    std::unique_lock<std::mutex> lock(g_mutex);
    g_cv.wait(lock, [] { return !g_running || !g_pcmf32.empty(); });
    if (!g_running) return false;
    audio = std::move(g_pcmf32);
    return true;
 }
 void wchess_main(size_t i) {
    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
    wparams.offset_ms        = 0;
    wparams.translate        = false;
    wparams.no_context       = true;
    wparams.single_segment   = true;
    wparams.print_realtime   = false;
    wparams.print_progress   = false;
    wparams.print_timestamps = true;
    wparams.print_special    = false;
    wparams.no_timestamps    = true;
    wparams.max_tokens       = 32;
    wparams.audio_ctx        = 1280; // partial encoder context for better performance
    wparams.temperature      = 0.0f;
    wparams.temperature_inc  = 2.0f;
    wparams.greedy.best_of   = 1;
    wparams.beam_search.beam_size = 1;
    wparams.language         = "en";
    wparams.grammar_penalty = 100.0;
    wparams.initial_prompt = "bishop to c3, rook to d4, knight to e5, d4 d5, knight to c3, c3, queen to d4, king b1, pawn to a1, bishop to b2, knight to c3,";
    printf("command: using %d threads\n", wparams.n_threads);
    WChess::callbacks cb;
    cb.get_audio = get_audio;
    cb.set_move = set_move;
    cb.set_grammar = set_grammar;
    WChess(g_contexts[i], wparams, cb, {}).run();
    if (i < g_contexts.size()) {
        whisper_free(g_contexts[i]);
        g_contexts[i] = nullptr;
    }
 }
 EMSCRIPTEN_BINDINGS(command) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
                        g_worker.join();
                    }
                    g_worker = std::thread([i]() {
                        wchess_main(i);
                    });
                    return i + 1;
                } else {
                    return (size_t) 0;
                }
            }
        }
        return (size_t) 0;
    }));
    emscripten::function("free", emscripten::optional_override([](size_t /* index */) {
        {
            std::unique_lock<std::mutex> lock(g_mutex);
            g_running = false;
        }
        g_cv.notify_one();
    }));
    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
        --index;
        if (index >= g_contexts.size()) {
            return -1;
        }
        if (g_contexts[index] == nullptr) {
            return -2;
        }
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            const int n = audio["length"].as<int>();
            emscripten::val heap = emscripten::val::module_property("HEAPU8");
            emscripten::val memory = heap["buffer"];
            g_pcmf32.resize(n);
            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
            memoryView.call<void>("set", audio);
        }
        g_cv.notify_one();
        return 0;
    }));
 }
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,2 @@`
							`/! chessboard.js v1.0.0 \| (c) 2019 Chris Oakman \| MIT License chessboardjs.com/license /`
							.clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}