select utf8 codepage on windows

fix building with MSVC + SDL2
2025-07-01 23:10:47 +02:00 · 2025-02-19 17:00:39 +08:00 · 2025-02-19 14:43:42 +08:00
298 changed files with 20457 additions and 128088 deletions
--- a/.github/workflows/bindings-ruby.yml
+++ b/.github/workflows/bindings-ruby.yml
@ -19,12 +19,7 @@ on:
      - ggml/**/*.m
      - ggml/**/*.metal
      - scripts/get-flags.mk
-      - examples/common.h
-      - examples/common.cpp
-      - examples/common-whisper.h
-      - examples/common-whisper.cpp
-      - examples/stb_vorbis.c
-      - examples/miniaudio.h
+      - examples/dr_wav.h
  pull_request:
    paths:
      - bindings/ruby/**
@ -44,12 +39,7 @@ on:
      - ggml/**/*.m
      - ggml/**/*.metal
      - scripts/get-flags.mk
-      - examples/common.h
-      - examples/common.cpp
-      - examples/common-whisper.h
-      - examples/common-whisper.cpp
-      - examples/stb_vorbis.c
-      - examples/miniaudio.h
+      - examples/dr_wav.h

 jobs:
  ubuntu-22:
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -6,81 +6,17 @@ on:
      - master
  pull_request:
    types: [opened, synchronize, reopened]
-  workflow_dispatch:
-    inputs:
-      create_release:
-        description: 'Create new release'
-        required: true
-        type: boolean
-      pre_release_tag:
-        description: 'Pre-release tag name'
-        required: false
-        type: string
-      run_type:
-        description: 'Workflow type to run'
-        required: true
-        type: choice
-        options:
-          - full-ci
-          - release-only

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

-permissions:
-  contents: write  # for creating release
-
 env:
-  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
  ubuntu_image: "ubuntu:22.04"
  VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"

 jobs:
-  determine-tag:
-    runs-on: ubuntu-latest
-    outputs:
-      tag_name: ${{ steps.tag.outputs.name }}
-
-    steps:
-      - name: Checkout with full history
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Determine tag name
-        id: tag
-        shell: bash
-        run: |
-          BUILD_NUMBER=$(git rev-list --count HEAD)
-          SHORT_HASH=$(git rev-parse --short=7 HEAD)
-          CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
-
-          echo "Raw values:"
-          echo "BUILD_NUMBER: $BUILD_NUMBER"
-          echo "SHORT_HASH: $SHORT_HASH"
-          echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
-          echo "CUSTOM_TAG: $CUSTOM_TAG"
-
-          # Use custom tag if provided
-          if [[ -n "$CUSTOM_TAG" ]]; then
-            echo "Using custom tag"
-            TAG_NAME="${CUSTOM_TAG}"
-          elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
-            echo "Using master branch format"
-            TAG_NAME="b${BUILD_NUMBER}"
-          else
-            echo "Using non-master branch format"
-            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
-            TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
-          fi
-
-          echo "Final tag name: $TAG_NAME"
-          echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
-
  ubuntu-22:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -107,8 +43,6 @@ jobs:
            cmake --build build --config Release -j $(nproc)'

  ubuntu-22-arm64:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -135,8 +69,6 @@ jobs:
            cmake --build build --config Release -j $(nproc)'

  ubuntu-22-arm-v7:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -163,25 +95,12 @@ jobs:
            cmake --build build --config Release -j $(nproc)'

  macOS-latest:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: macOS-latest

-    strategy:
-      matrix:
-        destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
-
    steps:
      - name: Clone
-        id: checkout
        uses: actions/checkout@v4

-      - name: ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
-        with:
-          key: macOS-latest-swift
-          evict-old-files: 1d
-
      - name: Dependencies
        run: |
          brew update
@ -189,38 +108,28 @@ jobs:

      - name: Build
        run: |
-          sysctl -a
-          cmake -B build -G Xcode \
-            -DGGML_METAL_USE_BF16=ON \
-            -DGGML_METAL_EMBED_LIBRARY=ON \
-            -DWHISPER_BUILD_EXAMPLES=OFF \
-            -DWHISPER_BUILD_TESTS=OFF \
-            -DWHISPER_BUILD_SERVER=OFF \
-            -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
-          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
+          cmake -B build
+          cmake --build build --config Release

-
-  freeBSD-latest:
-    runs-on: macos-13
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: Build
-        uses: cross-platform-actions/action@v0.27.0
-        with:
-          operating_system: freebsd
-          version: '14.2'
-          run: |
-            sudo pkg update
-            sudo pkg install -y gmake sdl2 cmake git
-            cmake -B build
-            cmake --build build --config Release
+#  freeBSD-latest:
+#    runs-on: macos-12
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v4
+#
+#      - name: Build
+#        uses: cross-platform-actions/action@v0.24.0
+#        with:
+#          operating_system: freebsd
+#          version: '13.3'
+#          run: |
+#            sudo pkg update
+#            sudo pkg install -y gmake sdl2 cmake
+#            cmake -B build
+#            cmake --build build --config Release

  ubuntu-22-gcc:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -249,8 +158,6 @@ jobs:
            ctest -L gh --output-on-failure'

  ubuntu-22-gcc-arm64:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -279,8 +186,6 @@ jobs:
            ctest -L gh --output-on-failure'

  ubuntu-22-gcc-arm-v7:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -309,8 +214,6 @@ jobs:
            ctest -L gh --output-on-failure'

  ubuntu-22-clang:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -342,8 +245,6 @@ jobs:
            ctest -L gh --output-on-failure'

  ubuntu-22-gcc-sanitized:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -372,8 +273,6 @@ jobs:
            ctest -L gh --output-on-failure'

  ubuntu-22-cmake-sycl:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -424,8 +323,6 @@ jobs:
          cmake --build . --config Release -j $(nproc)

  ubuntu-22-cmake-sycl-fp16:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -476,8 +373,6 @@ jobs:
          cmake --build . --config Release -j $(nproc)

  windows-msys2:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: windows-latest

    strategy:
@ -522,8 +417,6 @@ jobs:
            cmake --build build --config ${{ matrix.build }} -j $(nproc)

  windows:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: windows-latest

    strategy:
@ -584,8 +477,6 @@ jobs:
          path: build/bin/${{ matrix.build }}

  windows-blas:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: windows-latest

    strategy:
@ -659,8 +550,6 @@ jobs:
          path: build/bin/${{ matrix.build }}

  windows-cublas:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: windows-2019
    strategy:
      matrix:
@ -677,134 +566,15 @@ jobs:
      - name: Clone repository
        uses: actions/checkout@v4

-      - name: Install Ninja
-        id: install_ninja
-        run: |
-          choco install ninja
-
-      - name: Install ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
-        with:
-          key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
-          variant: sccache
-          evict-old-files: 5d
-
-      - name: Install Cuda Toolkit 11.8.0
-        if: ${{ matrix.cuda-toolkit == '11.8.0' }}
-        run: |
-          $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
-          $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
-          $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
-          # Components versions
-          $CUDART_VER = "11.8.89"
-          $NVCC_VER   = "11.8.89"
-          $NVRTC_VER  = "11.8.89"
-          $CUBLAS_VER = "11.8.1.74"
-          $NVTX_VER   = "11.8.86"
-          $VS_VER     = "11.8.86"
-          $NVPROF_VER = "11.8.87"
-          $CCCL_VER   = "11.8.89"
-
-          # Create the directory where the CUDA Toolkit will be installed
-          mkdir -p $CUDA_TOOLKIT_DIR
-
-          # Install unzip to extract the downloaded files
-          choco install unzip -y
-
-          # Download all the required components
-          curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
-          # Extract all the downloaded files to the CUDA Toolkit directory
-          unzip '*.zip' -d $CUDA_TOOLKIT_DIR
-
-          # Copy all the extracted files to the main CUDA Toolkit directory
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
-          # Visual Studio integration
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
-
-          # Set environment variables
-          echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-          echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-        
-      - name: Install Cuda Toolkit 12.2.0
-        if: ${{ matrix.cuda-toolkit == '12.2.0' }}
-        run: |
-          $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
-          $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
-          $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
-          # Components versions
-          $CUDART_VER   = "12.2.140"
-          $NVCC_VER     = "12.2.140"
-          $NVRTC_VER    = "12.2.140"
-          $CUBLAS_VER   = "12.2.5.6"
-          $NVTX_VER     = "12.2.140"
-          $PROFILER_VER = "12.2.140"
-          $VS_VER       = "12.2.140"
-          $NVPROF_VER   = "12.2.142"
-          $CCCL_VER     = "12.2.140"
-
-          # Create the directory where the CUDA Toolkit will be installed
-          mkdir -p $CUDA_TOOLKIT_DIR
-
-          # Install unzip to extract the downloaded files
-          choco install unzip -y
-
-          # Download all the required components
-          curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
-          # Extract all the downloaded files to the CUDA Toolkit directory
-          unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
-
-          # Copy all the extracted files to the main CUDA Toolkit directory
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*"     "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*"   "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*"   "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*"     "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
-          # Visual Studio integration
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
-
-          # Set environment variables
-          echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-          echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v2

+      - name: Install CUDA Toolkit
+        id: cuda-toolkit
+        uses: Jimver/cuda-toolkit@v0.2.15
+        with:
+          cuda: '${{ matrix.cuda-toolkit }}'
+
      - name: Install 7-Zip
        run: choco install 7zip -y

@ -816,30 +586,25 @@ jobs:
          echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt

-      - name: Install cmake
-        run: choco install cmake
+      - name: Configure CMake
+        shell: cmd
+        run: |
+          cmake -S . -B ./build -A ${{ matrix.arch }} ^
+            -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
+            -DGGML_CUDA=${{ matrix.cublas }} ^
+            -DCMAKE_CUDA_ARCHITECTURES=all  ^
+            -DWHISPER_SDL2=${{ matrix.sdl2 }} ^
+            -DSDL2_DIR="%SDL2_DIR%"

      - name: Build Project
        shell: cmd
        run: |
-          call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
-          cmake --version
-          where cmake
-          cmake -S . -B build -G "Ninja Multi-Config" ^
-            -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
-            -DGGML_CUDA=${{ matrix.cublas }} ^
-            -DWHISPER_SDL2=${{ matrix.sdl2 }} ^
-            -DSDL2_DIR="%SDL2_DIR%"
-          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
-          cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
-
-      - name: Check sccache status after build
-        run: |
-          sccache --show-stats
+          cd ./build
+          cmake --build . --config ${{ matrix.build }}

      - name: Copy CUDA DLLs
        run: |
-          Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
+          Get-ChildItem "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/" -Filter "*.dll" |
          Copy-Item -Destination "build/bin/${{ matrix.build }}"

      - name: Copy SDL2.dll
@ -853,8 +618,6 @@ jobs:
          path: build/bin/${{ matrix.build }}

  emscripten:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    strategy:
@ -878,7 +641,6 @@ jobs:

  ios-xcode-build:
    runs-on: macos-latest
-    needs: determine-tag

    strategy:
      matrix:
@ -909,38 +671,20 @@ jobs:
            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+          sudo cmake --install . --config Release

      - name: xcodebuild for swift package
        id: xcodebuild
        run: |
-          ./build-xcframework.sh
+          xcodebuild -scheme whisper-Package -destination 'generic/platform=iOS'

      - name: Build objc example
-        run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
+        run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO build

      - name: Build swiftui example
-        run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
-
-      - name: Pack artifacts
-        id: pack_artifacts
-        if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
-                github.event.inputs.create_release == 'true' ||
-                github.event.inputs.pre_release_tag != '' }}
-        run: |
-          zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
-
-      - name: Upload artifacts
-        if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
-                github.event.inputs.create_release == 'true' ||
-                github.event.inputs.pre_release_tag != '' }}
-        uses: actions/upload-artifact@v4
-        with:
-          path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
-          name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework
+        run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build

  android:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    steps:
@ -969,30 +713,31 @@ jobs:
          cd whisper/examples/whisper.android
          ./gradlew assembleRelease --no-daemon

-  android_java:
-    runs-on: ubuntu-22.04
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: set up JDK 11
-        uses: actions/setup-java@v4
-        with:
-          java-version: '11'
-          distribution: 'temurin'
-          cache: gradle
-
-      - name: Setup Android SDK
-        uses: android-actions/setup-android@v3
-        with:
-          cmdline-tools-version: 9.0
-
-      - name: Build
-        run: |
-          cd examples/whisper.android.java
-          chmod +x ./gradlew
-          ./gradlew assembleRelease
+# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
+#  android_java:
+#    runs-on: ubuntu-22.04
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v4
+#
+#      - name: set up JDK 11
+#        uses: actions/setup-java@v4
+#        with:
+#          java-version: '11'
+#          distribution: 'temurin'
+#          cache: gradle
+#
+#      - name: Setup Android SDK
+#        uses: android-actions/setup-android@v3
+#        with:
+#          cmdline-tools-version: 9.0
+#
+#      - name: Build
+#        run: |
+#          cd examples/whisper.android.java
+#          chmod +x ./gradlew
+#          ./gradlew assembleRelease

 # TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
 #  java:
@ -1039,8 +784,6 @@ jobs:
 #          PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}

  quantize:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
    runs-on: ubuntu-22.04

    steps:
@ -1053,69 +796,3 @@ jobs:
          cmake -B build
          cmake --build build --config Release
          ./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
-
-  release:
-    if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
-            github.event.inputs.create_release == 'true' ||
-            github.event.inputs.pre_release_tag != '' }}
-
-    runs-on: ubuntu-latest
-
-    needs:
-      - determine-tag
-      - ios-xcode-build
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
-        with:
-          key: release
-          evict-old-files: 1d
-
-      # Downloads all the artifacts from the previous jobs
-      - name: Download artifacts
-        id: download-artifact
-        uses: actions/download-artifact@v4
-        with:
-          path: ./artifact
-
-      - name: Move artifacts
-        id: move_artifacts
-        run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
-
-      - name: Create release
-        id: create_release
-        uses: ggml-org/action-create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          tag_name: ${{ needs.determine-tag.outputs.tag_name }}
-          prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
-
-      - name: Upload release
-        id: upload_release
-        uses: actions/github-script@v3
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          script: |
-            const path = require('path');
-            const fs = require('fs');
-            const release_id = '${{ steps.create_release.outputs.id }}';
-            for (let file of await fs.readdirSync('./artifact/release')) {
-              if (path.extname(file) === '.zip') {
-                console.log('uploadReleaseAsset', file);
-                await github.repos.uploadReleaseAsset({
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
-                  release_id: release_id,
-                  name: file,
-                  data: await fs.readFileSync(`./artifact/release/${file}`)
-                });
-              }
-            }
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -28,8 +28,6 @@ jobs:

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
-        with:
-          image: tonistiigi/binfmt:qemu-v7.0.0-28

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
--- a/.github/workflows/examples-wasm.yml
+++ b/.github/workflows/examples-wasm.yml
@ -1,91 +0,0 @@
-name: Examples WASM
-on:
-  push:
-    branches: ["master"]
-
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
-concurrency:
-  group: "pages"
-  cancel-in-progress: false
-
-jobs:
-  deploy-wasm-github-pages:
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Pages
-        uses: actions/configure-pages@v4
-
-      - name: Setup emsdk
-        uses: mymindstorm/setup-emsdk@v14
-
-      - name: Build WASM Examples
-        # Enable for real build later in whisper.cpp
-        run: |
-          mkdir -p build-em && cd build-em
-          emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
-          make -j
-
-      - name: Create staging directory
-        run: mkdir -p staging
-
-      - name: Create .nojekyll file in staging directory
-        run: touch staging/.nojekyll
-
-      - name: Copy application files
-        run: |
-          build_dir=build-em/bin
-
-          ls ${build_dir}
-
-          # command.wasm
-          target_dir=staging/command.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libcommand.js ${target_dir}
-
-          # bench.wasm
-          target_dir=staging/bench.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libbench.js ${target_dir}
-
-          # stream.wasm
-          target_dir=staging/stream.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libstream.js ${target_dir}
-
-          # whisper.wasm (this will be the main example page)
-          target_dir=staging
-          mkdir -p ${target_dir}
-          cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libmain.js ${target_dir}
-
-          # Copy Cross-Origin Isolation service worker
-          cp -v examples/coi-serviceworker.js staging/
-
-      - name: List files in staging directory (for debugging)
-        run: |
-          echo "Files in staging directory:"
-          find staging -type f | sort
-
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
-        with:
-          path: ./staging
-
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v4
--- a/.gitignore
+++ b/.gitignore
@ -58,5 +58,3 @@ cmake-build-debug/
 .cxx/
 .gradle/
 local.properties
-.log
-.exe
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -38,13 +38,8 @@ if (EMSCRIPTEN)

    # TODO: without these, we get the following error:
    #       wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
-    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
-
-    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
-    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
-
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
+    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
 else()
    if (MINGW)
        set(BUILD_SHARED_LIBS_DEFAULT OFF)
@ -67,8 +62,7 @@ option(WHISPER_ALL_WARNINGS           "whisper: enable all compiler warnings"
 option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)

 # build
-option(WHISPER_FATAL_WARNINGS  "whisper: enable -Werror flag"               OFF)
-option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
+option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)

 # sanitizers
 option(WHISPER_SANITIZE_THREAD    "whisper: enable thread sanitizer"    OFF)
@ -127,15 +121,7 @@ whisper_option_depr(WARNING     WHISPER_SYCL_F16            GGML_SYCL_F16)
 #

 if (NOT TARGET ggml)
-    if (WHISPER_USE_SYSTEM_GGML)
-        find_package(ggml REQUIRED)
-        if (NOT ggml_FOUND)
-            message(FATAL_ERROR "System-installed GGML library not found.")
-        endif()
-        add_library(ggml ALIAS ggml::ggml)
-    else()
-        add_subdirectory(ggml)
-    endif()
+    add_subdirectory(ggml)
    # ... otherwise assume ggml is added by a parent CMakeLists.txt
 endif()
 add_subdirectory(src)
--- a/13
+++ b/13
@ -18,6 +18,17 @@ samples:
 	@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
 	@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
 	@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
+	@echo "Converting to 16-bit WAV ..."
+	@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
+	@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
+	@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
+	@rm samples/*.ogg
+	@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
+	@rm samples/mm1.wav
+	@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
+	@rm samples/a13.mp3
+	@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
+	@rm samples/diffusion2023-07-03.flac

 #
 # Models
@ -48,7 +59,7 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
 	@echo "Running $@ on all samples in ./samples ..."
 	@echo "==============================================="
 	@echo ""
-	@for f in samples/*$(.flac .mp3 .ogg .wav); do \
+	@for f in samples/*.wav; do \
 		echo "----------------------------------------------" ; \
 		echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
 	    echo "----------------------------------------------" ; \
--- a/Package.swift
+++ b/Package.swift
@ -0,0 +1,19 @@
+// swift-tools-version:5.5
+
+import PackageDescription
+
+let package = Package(
+    name: "whisper",
+    platforms: [
+        .macOS(.v12),
+        .iOS(.v14),
+        .watchOS(.v4),
+        .tvOS(.v14)
+    ],
+    products: [
+        .library(name: "whisper", targets: ["whisper"]),
+    ],
+    targets: [
+        .systemLibrary(name: "whisper", pkgConfig: "whisper"),
+    ]
+)
--- a/README.md
+++ b/README.md
@ -184,11 +184,11 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in
  ```

  - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
-  - Python 3.11 is recommended.
+  - Python 3.10 is recommended.
  - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
  - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
-    - To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
-    - To activate the environment, use: `conda activate py311-whisper`
+    - To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
+    - To activate the environment, use: `conda activate py310-whisper`

 - Generate a Core ML model. For example, to generate a `base.en` model, use:

@ -427,8 +427,7 @@ For detailed instructions on how to use Conan, please refer to the [Conan docume

 This is a naive example of performing real-time inference on audio from your microphone.
 The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
-More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10). 
-You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly. 
+More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).

 ```bash
 cmake -B build -DWHISPER_SDL2=ON
--- a/Sources/whisper/module.modulemap
+++ b/Sources/whisper/module.modulemap
@ -0,0 +1,5 @@
+module whisper [system] {
+    header "whisper.h"
+    link "whisper"
+    export *
+}
--- a/Sources/whisper/whisper.h
+++ b/Sources/whisper/whisper.h
@ -0,0 +1,4 @@
+#pragma once
+
+#include <whisper.h>
+
--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -11,11 +11,11 @@ UNAME_M := $(shell uname -m)
 endif

 GGML_METAL_PATH_RESOURCES := $(abspath ../..)
-BUILD_DIR := build_go
+BUILD_DIR := build
 MODELS_DIR := models
 EXAMPLES_DIR := $(wildcard examples/*)
 INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
-LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
+LIBRARY_PATH := $(abspath ../..)

 ifeq ($(GGML_CUDA),1)
 	LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
@ -29,10 +29,8 @@ endif
 all: clean whisper examples

 whisper: mkdir
-	cmake -S ../.. -B ../../${BUILD_DIR} \
-		-DCMAKE_BUILD_TYPE=Release \
-		-DBUILD_SHARED_LIBS=OFF
-	cmake --build ../../${BUILD_DIR} --target whisper
+	@echo Build whisper
+	@${MAKE} -C ../.. libwhisper.a

 test: model-small whisper modtidy
 ifeq ($(UNAME_S),Darwin)
--- a/bindings/go/README.md
+++ b/bindings/go/README.md
@ -31,7 +31,7 @@ func main() {
 	if err != nil {
 		panic(err)
 	}
-	if err := context.Process(samples, nil, nil, nil); err != nil {
+	if err := context.Process(samples, nil, nil); err != nil {
 		return err
 	}

--- a/bindings/go/examples/go-model-download/context.go
+++ b/bindings/go/examples/go-model-download/context.go
@ -9,23 +9,22 @@ import (
 // ContextForSignal returns a context object which is cancelled when a signal
 // is received. It returns nil if no signal parameter is provided
 func ContextForSignal(signals ...os.Signal) context.Context {
-    if len(signals) == 0 {
-        return nil
-    }
+	if len(signals) == 0 {
+		return nil
+	}

-    ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
-    ctx, cancel := context.WithCancel(context.Background())
+	ch := make(chan os.Signal)
+	ctx, cancel := context.WithCancel(context.Background())

-    // Send message on channel when signal received
-    signal.Notify(ch, signals...)
+	// Send message on channel when signal received
+	signal.Notify(ch, signals...)

-    // When any signal is received, call cancel
-    go func() {
-        <-ch
-        cancel()
-    }()
+	// When any signal received, call cancel
+	go func() {
+		<-ch
+		cancel()
+	}()

-    // Return success
-    return ctx
+	// Return success
+	return ctx
 }
-
--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -9,7 +9,6 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
-	"strings"
 	"syscall"
 	"time"
 )
@ -18,27 +17,14 @@ import (
 // CONSTANTS

 const (
-	srcUrl  = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
-	srcExt  = ".bin"                                                       // Filename extension
-	bufSize = 1024 * 64                                                    // Size of the buffer used for downloading the model
+	srcUrl  = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
+	srcExt  = ".bin"                                                      // Filename extension
+	bufSize = 1024 * 64                                                   // Size of the buffer used for downloading the model
 )

 var (
 	// The models which will be downloaded, if no model is specified as an argument
-	modelNames = []string{
-		"tiny", "tiny-q5_1", "tiny-q8_0",
-		"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
-		"base", "base-q5_1", "base-q8_0",
-		"base.en", "base.en-q5_1", "base.en-q8_0",
-		"small", "small-q5_1", "small-q8_0",
-		"small.en", "small.en-q5_1", "small.en-q8_0",
-		"medium", "medium-q5_0", "medium-q8_0",
-		"medium.en", "medium.en-q5_0", "medium.en-q8_0",
-		"large-v1",
-		"large-v2", "large-v2-q5_0", "large-v2-q8_0",
-		"large-v3", "large-v3-q5_0",
-		"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
-	}
+	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
 )

 var (
@ -58,25 +44,7 @@ var (
 func main() {
 	flag.Usage = func() {
 		name := filepath.Base(flag.CommandLine.Name())
-		fmt.Fprintf(flag.CommandLine.Output(), `
-			Usage: %s [options] [<model>...]
-
-			Options:
-  			-out string     Specify the output folder where models will be saved.
-                  			Default: Current working directory.
-  			-timeout duration Set the maximum duration for downloading a model.
-            			      Example: 10m, 1h (default: 30m0s).
-  			-quiet           Suppress all output except errors.
-
-			Examples:
-  			1. Download a specific model:
-     			%s -out ./models tiny-q8_0
-
-			  2. Download all models:
-     			%s -out ./models
-
-			`, name, name, name)
-
+		fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
 		flag.PrintDefaults()
 	}
 	flag.Parse()
@ -146,87 +114,23 @@ func GetOut() (string, error) {
 // GetModels returns the list of models to download
 func GetModels() []string {
 	if flag.NArg() == 0 {
-		fmt.Println("No model specified.")
-		fmt.Println("Preparing to download all models...")
-
-		// Calculate total download size
-		fmt.Println("Calculating total download size...")
-		totalSize, err := CalculateTotalDownloadSize(modelNames)
-		if err != nil {
-			fmt.Println("Error calculating download sizes:", err)
-			os.Exit(1)
-		}
-
-		fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
-		fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
-		fmt.Println("Would you like to download all models? (y/N)")
-
-		// Prompt for user input
-		var response string
-		fmt.Scanln(&response)
-		if response != "y" && response != "Y" {
-			fmt.Println("Aborting. Specify a model to download.")
-			os.Exit(0)
-		}
-
-		return modelNames // Return all models if confirmed
+		return modelNames
+	} else {
+		return flag.Args()
 	}
-	return flag.Args() // Return specific models if arguments are provided
-}
-
-func CalculateTotalDownloadSize(models []string) (int64, error) {
-	var totalSize int64
-	client := http.Client{}
-
-	for _, model := range models {
-		modelURL, err := URLForModel(model)
-		if err != nil {
-			return 0, err
-		}
-
-		// Issue a HEAD request to get the file size
-		req, err := http.NewRequest("HEAD", modelURL, nil)
-		if err != nil {
-			return 0, err
-		}
-
-		resp, err := client.Do(req)
-		if err != nil {
-			return 0, err
-		}
-		resp.Body.Close()
-
-		if resp.StatusCode != http.StatusOK {
-			fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
-			continue
-		}
-
-		size := resp.ContentLength
-		totalSize += size
-	}
-	return totalSize, nil
 }

 // URLForModel returns the URL for the given model on huggingface.co
 func URLForModel(model string) (string, error) {
-	// Ensure "ggml-" prefix is added only once
-	if !strings.HasPrefix(model, "ggml-") {
-		model = "ggml-" + model
-	}
-
-	// Ensure ".bin" extension is added only once
 	if filepath.Ext(model) != srcExt {
 		model += srcExt
 	}
-
-	// Parse the base URL
 	url, err := url.Parse(srcUrl)
 	if err != nil {
 		return "", err
+	} else {
+		url.Path = filepath.Join(url.Path, model)
 	}
-
-	// Ensure no trailing slash in the base URL
-	url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
 	return url.String(), nil
 }

--- a/bindings/go/examples/go-whisper/process.go
+++ b/bindings/go/examples/go-whisper/process.go
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
 	// Process the data
 	fmt.Fprintf(flags.Output(), "  ...processing %q\n", path)
 	context.ResetTimings()
-	if err := context.Process(data, nil, cb, nil); err != nil {
+	if err := context.Process(data, cb, nil); err != nil {
 		return err
 	}

--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@ -71,10 +71,6 @@ func (context *context) Language() string {
 	return whisper.Whisper_lang_str(context.params.Language())
 }

-func (context *context) DetectedLanguage() string {
-	return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
-}
-
 // Set translate flag
 func (context *context) SetTranslate(v bool) {
 	context.params.SetTranslate(v)
@ -193,7 +189,6 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
 // Process new sample data and return any errors
 func (context *context) Process(
 	data []float32,
-	callEncoderBegin EncoderBeginCallback,
 	callNewSegment SegmentCallback,
 	callProgress ProgressCallback,
 ) error {
@ -208,20 +203,7 @@ func (context *context) Process(
 	// We don't do parallel processing at the moment
 	processors := 0
 	if processors > 1 {
-		if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
-			func(new int) {
-				if callNewSegment != nil {
-					num_segments := context.model.ctx.Whisper_full_n_segments()
-					s0 := num_segments - new
-					for i := s0; i < num_segments; i++ {
-						callNewSegment(toSegment(context.model.ctx, i))
-					}
-				}
-			}); err != nil {
-			return err
-		}
-	} else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
-		func(new int) {
+		if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
 			if callNewSegment != nil {
 				num_segments := context.model.ctx.Whisper_full_n_segments()
 				s0 := num_segments - new
@ -229,11 +211,22 @@ func (context *context) Process(
 					callNewSegment(toSegment(context.model.ctx, i))
 				}
 			}
-		}, func(progress int) {
-			if callProgress != nil {
-				callProgress(progress)
-			}
 		}); err != nil {
+			return err
+		}
+	} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
+		if callNewSegment != nil {
+			num_segments := context.model.ctx.Whisper_full_n_segments()
+			s0 := num_segments - new
+			for i := s0; i < num_segments; i++ {
+				callNewSegment(toSegment(context.model.ctx, i))
+			}
+		}
+	}, func(progress int) {
+		if callProgress != nil {
+			callProgress(progress)
+		}
+	}); err != nil {
 		return err
 	}

--- a/bindings/go/pkg/whisper/context_test.go
+++ b/bindings/go/pkg/whisper/context_test.go
@ -88,37 +88,6 @@ func TestProcess(t *testing.T) {
 	context, err := model.NewContext()
 	assert.NoError(err)

-	err = context.Process(data, nil, nil, nil)
+	err = context.Process(data, nil, nil)
 	assert.NoError(err)
 }
-
-func TestDetectedLanguage(t *testing.T) {
-	assert := assert.New(t)
-
-	fh, err := os.Open(SamplePath)
-	assert.NoError(err)
-	defer fh.Close()
-
-	// Decode the WAV file - load the full buffer
-	dec := wav.NewDecoder(fh)
-	buf, err := dec.FullPCMBuffer()
-	assert.NoError(err)
-	assert.Equal(uint16(1), dec.NumChans)
-
-	data := buf.AsFloat32Buffer().Data
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	context, err := model.NewContext()
-	assert.NoError(err)
-
-	err = context.Process(data, nil, nil, nil)
-	assert.NoError(err)
-
-	expectedLanguage := "en"
-	actualLanguage := context.DetectedLanguage()
-	assert.Equal(expectedLanguage, actualLanguage)
-}
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@ -16,10 +16,6 @@ type SegmentCallback func(Segment)
 // processing. It is called during the Process function
 type ProgressCallback func(int)

-// EncoderBeginCallback is the callback function for checking if we want to
-// continue processing. It is called during the Process function
-type EncoderBeginCallback func() bool
-
 // Model is the interface to a whisper model. Create a new model with the
 // function whisper.New(string)
 type Model interface {
@ -35,13 +31,12 @@ type Model interface {
 	Languages() []string
 }

-// Context is the speech recognition context.
+// Context is the speach recognition context.
 type Context interface {
 	SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
 	SetTranslate(bool)        // Set translate flag
 	IsMultilingual() bool     // Return true if the model is multilingual.
 	Language() string         // Get language
-	DetectedLanguage() string // Get detected language

 	SetOffset(time.Duration)          // Set offset
 	SetDuration(time.Duration)        // Set duration
@ -63,7 +58,7 @@ type Context interface {
 	// Process mono audio data and return any errors.
 	// If defined, newly generated segments are passed to the
 	// callback function during processing.
-	Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
+	Process([]float32, SegmentCallback, ProgressCallback) error

 	// After process is called, return segments until the end of the stream
 	// is reached, when io.EOF is returned.
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -9,7 +9,7 @@ import (
 // CGO

 /*
-#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu  -lm -lstdc++ -fopenmp
+#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
 #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
 #include <whisper.h>
 #include <stdlib.h>
--- a/bindings/java/build.gradle
+++ b/bindings/java/build.gradle
@ -25,13 +25,13 @@ sourceSets {
 }

 tasks.register('copyLibwhisperDynlib', Copy) {
-    from '../../build/src'
-    include 'libwhisper.dylib'
+    from '../../build'
+    include 'libwhisper.dynlib'
    into 'build/generated/resources/main/darwin'
 }

 tasks.register('copyLibwhisperSo', Copy) {
-    from '../../build/src'
+    from '../../build'
    include 'libwhisper.so'
    into 'build/generated/resources/main/linux-x86-64'
 }
@ -55,12 +55,7 @@ java {
    withJavadocJar()
 }

-sourcesJar() {
-    dependsOn copyLibs
-}
-
 jar {
-    dependsOn copyLibs
    exclude '**/whisper_java.exp', '**/whisper_java.lib'
 }

@ -72,9 +67,6 @@ tasks.withType(Test) {
    useJUnitPlatform()
 }

-test.dependsOn copyLibs
-processResources.dependsOn copyLibs
-
 dependencies {
    implementation "net.java.dev.jna:jna:5.13.0"
    testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
--- a/bindings/java/gradlew
+++ b/bindings/java/gradlew
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperConstants.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperConstants.java
@ -1,24 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-/**
- * Presets for alignment heads in DTW token timestamps
- */
-public class WhisperConstants {
-    // Alignment heads presets
-    public static final int WHISPER_AHEADS_NONE = 0;
-    public static final int WHISPER_AHEADS_TINY_EN = 1;
-    public static final int WHISPER_AHEADS_TINY = 2;
-    public static final int WHISPER_AHEADS_BASE_EN = 3;
-    public static final int WHISPER_AHEADS_BASE = 4;
-    public static final int WHISPER_AHEADS_SMALL_EN = 5;
-    public static final int WHISPER_AHEADS_SMALL = 6;
-    public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
-    public static final int WHISPER_AHEADS_MEDIUM = 8;
-    public static final int WHISPER_AHEADS_LARGE_V1 = 9;
-    public static final int WHISPER_AHEADS_LARGE_V2 = 10;
-    public static final int WHISPER_AHEADS_LARGE_V3 = 11;
-    public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
-    public static final int WHISPER_AHEADS_CUSTOM = 13;
-    public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
-    public static final int WHISPER_AHEADS_COUNT = 15;
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
@ -1,9 +1,7 @@
 package io.github.ggerganov.whispercpp;

-import com.sun.jna.NativeLong;
 import com.sun.jna.Structure;
 import com.sun.jna.ptr.PointerByReference;
-import com.sun.jna.Pointer;
 import io.github.ggerganov.whispercpp.ggml.GgmlType;
 import io.github.ggerganov.whispercpp.WhisperModel;
 import io.github.ggerganov.whispercpp.params.WhisperContextParams;
@ -11,26 +9,33 @@ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
 import java.util.List;

 public class WhisperContext extends Structure {
-    public NativeLong t_load_us;
-    public NativeLong t_start_us;
+    int t_load_us = 0;
+    int t_start_us = 0;

    /** weight type (FP32 / FP16 / QX) */
-    public GgmlType wtype = GgmlType.GGML_TYPE_F16;
+    GgmlType wtype = GgmlType.GGML_TYPE_F16;
    /** intermediate type (FP32 or FP16) */
-    public GgmlType itype = GgmlType.GGML_TYPE_F16;
+    GgmlType itype = GgmlType.GGML_TYPE_F16;

-    public WhisperContextParams.ByValue params;
-
-    public Pointer model;
-    public Pointer vocab;
-    public Pointer state;
+//    WhisperModel model;
+    public PointerByReference model;
+//    whisper_vocab vocab;
+//    whisper_state * state = nullptr;
+    public PointerByReference vocab;
+    public PointerByReference state;

    /** populated by whisper_init_from_file_with_params() */
-    public Pointer path_model;
+    String path_model;
+    WhisperContextParams params;

-    @Override
-    protected List<String> getFieldOrder() {
-        return List.of("t_load_us", "t_start_us", "wtype", "itype",
-                "params", "model", "vocab", "state", "path_model");
-    }
+//    public static class ByReference extends WhisperContext implements Structure.ByReference {
+//    }
+//
+//    public static class ByValue extends WhisperContext implements Structure.ByValue {
+//    }
+//
+//    @Override
+//    protected List<String> getFieldOrder() {
+//        return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
+//    }
 }
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
@ -43,11 +43,11 @@ public class WhisperCpp implements AutoCloseable {
     * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
     * @param params - params to use when initialising the context
     */
-    public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
+    public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
        initContextImpl(modelPath, params);
    }

-    private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
+    private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
        if (ctx != null) {
            lib.whisper_free(ctx);
        }
@ -69,13 +69,15 @@ public class WhisperCpp implements AutoCloseable {

    /**
     * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
-     * Returns a ByValue instance to ensure proper parameter passing to native code.
+     * Because this function allocates memory for the params, the caller must call either:
+     * - call `whisper_free_context_params()`
+     * - `Native.free(Pointer.nativeValue(pointer));`
     */
-    public WhisperContextParams.ByValue getContextDefaultParams() {
-        WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
-            lib.whisper_context_default_params_by_ref());
-        valueParams.read();
-        return valueParams;
+    public WhisperContextParams getContextDefaultParams() {
+        paramsPointer = lib.whisper_context_default_params_by_ref();
+        WhisperContextParams params = new WhisperContextParams(paramsPointer);
+        params.read();
+        return params;
    }
    
    /**
@ -86,7 +88,7 @@ public class WhisperCpp implements AutoCloseable {
     *
     * @param strategy - GREEDY
     */
-    public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
+    public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
        Pointer pointer;

        // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
@ -102,7 +104,7 @@ public class WhisperCpp implements AutoCloseable {
            pointer = beamParamsPointer;
        }

-        WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
+        WhisperFullParams params = new WhisperFullParams(pointer);
        params.read();
        return params;
    }
@ -136,21 +138,15 @@ public class WhisperCpp implements AutoCloseable {
    }

    /**
-     * Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text.
+     * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
     * Not thread safe for same context
     * Uses the specified decoding strategy to obtain the text.
     */
-    public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
+    public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
        if (ctx == null) {
            throw new IllegalStateException("Model not initialised");
        }

-        /*
-        WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
-            lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
-        valueParams.read();
-        */
-
        if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
            throw new IOException("Failed to process audio");
        }
@ -167,17 +163,12 @@ public class WhisperCpp implements AutoCloseable {

        return str.toString().trim();
    }
-
    public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
        if (ctx == null) {
            throw new IllegalStateException("Model not initialised");
        }

-        WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
-            lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
-        valueParams.read();
-
-        if (lib.whisper_full(ctx, valueParams, audioData, audioData.length) != 0) {
+        if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
            throw new IOException("Failed to process audio");
        }

--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
@ -38,7 +38,7 @@ public interface WhisperCppJnaLibrary extends Library {
     * @param params     Pointer to whisper_context_params
     * @return Whisper context on success, null on failure
     */
-    Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
+    Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);

    /**
     * Allocate (almost) all memory needed for the model by loading from a buffer.
@ -180,12 +180,12 @@ public interface WhisperCppJnaLibrary extends Library {
    /**
     * @return the id of the specified language, returns -1 if not found.
     * Examples:
-     *   "de" -&gt; 2
-     *   "german" -&gt; 2
+     *   "de" -> 2
+     *   "german" -> 2
     */
    int whisper_lang_id(String lang);

-    /** @return the short string of the specified language id (e.g. 2 -&gt; "de"), returns nullptr if not found */
+    /** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
    String whisper_lang_str(int id);

    /**
@ -268,21 +268,20 @@ public interface WhisperCppJnaLibrary extends Library {
    void whisper_free_params(Pointer params);

    /**
-     * Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text
+     * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
     * Not thread safe for same context
     * Uses the specified decoding strategy to obtain the text.
     */
-    int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
+    int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);

-    public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
-    //int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
+    int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);

    // Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
    // Result is stored in the default state of the context
    // Not thread safe if executed in parallel on the same context.
    // It seems this approach can offer some speedup in some cases.
    // However, the transcription accuracy can be worse at the beginning and end of each chunk.
-    int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
+    int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);

    /**
     * Number of generated text segments.
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/GgmlAbortCallback.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/GgmlAbortCallback.java
@ -1,17 +0,0 @@
-package io.github.ggerganov.whispercpp.callbacks;
-
-import com.sun.jna.Callback;
-
-/**
- * Callback for aborting GGML computation
- * Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
- */
-public interface GgmlAbortCallback extends Callback {
-    /**
-     * Return true to abort the computation, false to continue
-     *
-     * @param data User data passed to the callback
-     * @return true to abort, false to continue
-     */
-    boolean invoke(com.sun.jna.Pointer data);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperAhead.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperAhead.java
@ -1,30 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-import com.sun.jna.*;
-import java.util.Arrays;
-import java.util.List;
-
-public class WhisperAhead extends Structure {
-
-    public int n_text_layer;
-
-    public int n_head;
-
-    public WhisperAhead() {
-        super();
-    }
-
-    public WhisperAhead(int textLayer, int head) {
-        super();
-        this.n_text_layer = textLayer;
-        this.n_head = head;
-    }
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("n_text_layer", "n_head");
-    }
-
-    public static class ByReference extends WhisperAhead implements Structure.ByReference {}
-
-    public static class ByValue extends WhisperAhead implements Structure.ByValue {}
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperAheads.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperAheads.java
@ -1,41 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-import com.sun.jna.*;
-import java.util.Arrays;
-import java.util.List;
-
-public class WhisperAheads extends Structure {
-    public NativeLong n_heads;
-
-    public Pointer heads;
-
-    public WhisperAheads() {
-        super();
-    }
-
-    /**
-     * Create alignment heads from an array of WhisperAhead objects
-     */
-    public void setHeads(WhisperAhead[] aheadsArray) {
-        this.n_heads = new NativeLong(aheadsArray.length);
-
-        int structSize = aheadsArray[0].size();
-        Memory mem = new Memory(structSize * aheadsArray.length);
-
-        for (int i = 0; i < aheadsArray.length; i++) {
-            aheadsArray[i].write();
-            byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
-            mem.write(i * structSize, buffer, 0, buffer.length);
-        }
-
-        this.heads = mem;
-    }
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("n_heads", "heads");
-    }
-
-    public static class ByReference extends WhisperAheads implements Structure.ByReference {}
-
-    public static class ByValue extends WhisperAheads implements Structure.ByValue {}
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
@ -1,5 +1,7 @@
 package io.github.ggerganov.whispercpp.params;
+
 import com.sun.jna.*;
+
 import java.util.Arrays;
 import java.util.List;

@ -9,73 +11,21 @@ import java.util.List;
 * whisper_context_default_params()
 */
 public class WhisperContextParams extends Structure {
+
    public WhisperContextParams(Pointer p) {
        super(p);
    }

-    public WhisperContextParams() {
-        super();
-    }
-
-    /** Use GPU for inference (default = true) */
+    /** Use GPU for inference Number (default = true) */
    public CBool use_gpu;

-    /** Use flash attention (default = false) */
-    public CBool flash_attn;
-
-    /** CUDA device to use (default = 0) */
-    public int gpu_device;
-
-    /** [EXPERIMENTAL] Enable token-level timestamps with DTW (default = false) */
-    public CBool dtw_token_timestamps;
-
-    /** [EXPERIMENTAL] Alignment heads preset for DTW */
-    public int dtw_aheads_preset;
-
-    /** Number of top layers to use for DTW when using WHISPER_AHEADS_N_TOP_MOST preset */
-    public int dtw_n_top;
-
-    public WhisperAheads.ByValue dtw_aheads;
-
-    /** DTW memory size (internal use) */
-    public NativeLong dtw_mem_size;
-
-    /** Use GPU for inference */
+    /** Use GPU for inference Number (default = true) */
    public void useGpu(boolean enable) {
        use_gpu = enable ? CBool.TRUE : CBool.FALSE;
    }

-    /** Use flash attention */
-    public void useFlashAttn(boolean enable) {
-        flash_attn = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Enable DTW token-level timestamps */
-    public void enableDtwTokenTimestamps(boolean enable) {
-        dtw_token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Set DTW alignment heads preset */
-    public void setDtwAheadsPreset(int preset) {
-        dtw_aheads_preset = preset;
-    }
-
    @Override
    protected List<String> getFieldOrder() {
-        return Arrays.asList(
-            "use_gpu",
-            "flash_attn",
-            "gpu_device",
-            "dtw_token_timestamps",
-            "dtw_aheads_preset",
-            "dtw_n_top",
-            "dtw_aheads",
-            "dtw_mem_size"
-        );
-    }
-
-    public static class ByValue extends WhisperContextParams implements Structure.ByValue {
-        public ByValue() { super(); }
-        public ByValue(Pointer p) { super(p); }
+        return Arrays.asList("use_gpu");
    }
 }
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
@ -5,7 +5,6 @@ import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
 import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
 import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
 import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
-import io.github.ggerganov.whispercpp.callbacks.GgmlAbortCallback;

 import java.util.Arrays;
 import java.util.List;
@ -17,12 +16,10 @@ import java.util.List;
 */
 public class WhisperFullParams extends Structure {

-    public WhisperFullParams() {
-        super();
-    }
-
    public WhisperFullParams(Pointer p) {
        super(p);
+//        super(p, ALIGN_MSVC);
+//        super(p, ALIGN_GNUC);
    }

    /** Sampling strategy for whisper_full() function. */
@ -72,10 +69,10 @@ public class WhisperFullParams extends Structure {
        single_segment = single ? CBool.TRUE : CBool.FALSE;
    }

-    /** Flag to print special tokens (e.g., &lt;SOT&gt;, &lt;EOT&gt;, &lt;BEG&gt;, etc.). (default = false) */
+    /** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
    public CBool print_special;

-    /** Flag to print special tokens (e.g., &lt;SOT&gt;, &lt;EOT&gt;, &lt;BEG&gt;, etc.). (default = false) */
+    /** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
    public void printSpecial(boolean enable) {
        print_special = enable ? CBool.TRUE : CBool.FALSE;
    }
@ -132,14 +129,6 @@ public class WhisperFullParams extends Structure {
    /** Maximum tokens per segment (0, default = no limit) */
    public int max_tokens;

-    /** [EXPERIMENTAL] Enable debug mode for extra info */
-    public CBool debug_mode;
-
-    /** Enable debug mode */
-    public void enableDebugMode(boolean enable) {
-        debug_mode = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
    /** Overwrite the audio context size (0 = use default). */
    public int audio_ctx;

@ -285,16 +274,6 @@ public class WhisperFullParams extends Structure {
     */
    public Pointer encoder_begin_callback_user_data;

-    /** Callback used to abort GGML computation */
-    public Pointer abort_callback;
-
-    /** User data for the abort_callback */
-    public Pointer abort_callback_user_data;
-
-    public void setAbortCallback(GgmlAbortCallback callback) {
-        abort_callback = CallbackReference.getFunctionPointer(callback);
-    }
-
    /**
     * Callback by each decoder to filter obtained logits.
     * WhisperLogitsFilterCallback
@ -331,28 +310,17 @@ public class WhisperFullParams extends Structure {

    @Override
    protected List<String> getFieldOrder() {
-        return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
-                "offset_ms", "duration_ms", "translate", "no_context",
-                "no_timestamps", "single_segment", "print_special",
-                "print_progress", "print_realtime", "print_timestamps",
-                "token_timestamps", "thold_pt", "thold_ptsum", "max_len",
-                "split_on_word", "max_tokens", "debug_mode", "audio_ctx", 
-                "tdrz_enable", "suppress_regex", "initial_prompt",
-                "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
-                "suppress_blank", "suppress_nst", "temperature",
-                "max_initial_ts", "length_penalty", "temperature_inc",
-                "entropy_thold", "logprob_thold", "no_speech_thold", "greedy",
-                "beam_search", "new_segment_callback", "new_segment_callback_user_data",
+        return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
+                "no_context", "single_segment", "no_timestamps",
+                "print_special", "print_progress", "print_realtime", "print_timestamps",  "token_timestamps",
+                "thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
+                "tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
+                "suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
+                "temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
+                "new_segment_callback", "new_segment_callback_user_data",
                "progress_callback", "progress_callback_user_data",
                "encoder_begin_callback", "encoder_begin_callback_user_data",
-                "abort_callback", "abort_callback_user_data",
                "logits_filter_callback", "logits_filter_callback_user_data",
                "grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
    }
-
-    public static class ByValue extends WhisperFullParams implements Structure.ByValue {
-        public ByValue() { super(); }
-        public ByValue(Pointer p) { super(p); }
-    }
-
 }
--- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
+++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
@ -76,7 +76,7 @@ class WhisperCppTest {
        float[] floats = new float[b.length / 2];

        //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-        WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
+        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
        params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
        params.print_progress = CBool.FALSE;
        //params.initial_prompt = "and so my fellow Americans um, like";
--- a/bindings/javascript/README.md
+++ b/bindings/javascript/README.md
@ -33,9 +33,6 @@ mkdir build-em && cd build-em
 emcmake cmake .. && make -j

 # run test
-node ../tests/test-whisper.js
-
-# For Node.js versions prior to v16.4.0, experimental features need to be enabled:
 node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js

 # publish npm package
--- a/bindings/ruby/ext/cpu.mk
+++ b/bindings/ruby/ext/cpu.mk
@ -1,7 +1,5 @@
 ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
 	ggml/src/ggml-cpu/ggml-cpu.cpp \
-	ggml/src/ggml-cpu/unary-ops.cpp \
-	ggml/src/ggml-cpu/binary-ops.cpp \
 	ggml/include/ggml-backend.h \
 	ggml/include/ggml.h \
 	ggml/include/ggml-alloc.h \
--- a/bindings/ruby/ext/extconf.rb
+++ b/bindings/ruby/ext/extconf.rb
@ -35,7 +35,7 @@ if $GGML_METAL
  $GGML_METAL_EMBED_LIBRARY = true
 end

-$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
+$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
 $MK_CFLAGS   = '-std=c11   -fPIC'
 $MK_CXXFLAGS = '-std=c++17 -fPIC'
 $MK_NVCCFLAGS = '-std=c++17'
@ -168,14 +168,10 @@ $OBJ_GGML <<
  'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
  'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
  'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-traits.o' <<
-  'ggml/src/ggml-cpu/unary-ops.o' <<
-  'ggml/src/ggml-cpu/binary-ops.o'
+  'ggml/src/ggml-cpu/ggml-cpu-traits.o'

 $OBJ_WHISPER <<
-  'src/whisper.o' <<
-  'examples/common.o' <<
-  'examples/common-whisper.o'
+  'src/whisper.o'

 $objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
 $objs <<
--- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp
+++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp
@ -1,6 +1,7 @@
 #include <ruby.h>
 #include "ruby_whisper.h"
-#include "common-whisper.h"
+#define DR_WAV_IMPLEMENTATION
+#include "dr_wav.h"
 #include <string>
 #include <vector>

@ -46,9 +47,84 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
  std::vector<float> pcmf32; // mono-channel F32 PCM
  std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM

-  if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
-    fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
-    return self;
+  // WAV input - this is directly from main.cpp example
+  {
+    drwav wav;
+    std::vector<uint8_t> wav_data; // used for pipe input from stdin
+
+    if (fname_inp == "-") {
+      {
+        uint8_t buf[1024];
+        while (true) {
+          const size_t n = fread(buf, 1, sizeof(buf), stdin);
+          if (n == 0) {
+            break;
+          }
+          wav_data.insert(wav_data.end(), buf, buf + n);
+        }
+      }
+
+      if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
+        fprintf(stderr, "error: failed to open WAV file from stdin\n");
+        return self;
+      }
+
+      fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
+    } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
+      fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
+      return self;
+    }
+
+    if (wav.channels != 1 && wav.channels != 2) {
+      fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
+      return self;
+    }
+
+    if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
+      fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
+      return self;
+    }
+
+    if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
+      fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
+      return self;
+    }
+
+    if (wav.bitsPerSample != 16) {
+      fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
+      return self;
+    }
+
+    const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
+
+    std::vector<int16_t> pcm16;
+    pcm16.resize(n*wav.channels);
+    drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
+    drwav_uninit(&wav);
+
+    // convert to mono, float
+    pcmf32.resize(n);
+    if (wav.channels == 1) {
+      for (uint64_t i = 0; i < n; i++) {
+        pcmf32[i] = float(pcm16[i])/32768.0f;
+      }
+    } else {
+      for (uint64_t i = 0; i < n; i++) {
+        pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
+      }
+    }
+
+    if (rwp->diarize) {
+      // convert to stereo, float
+      pcmf32s.resize(2);
+
+      pcmf32s[0].resize(n);
+      pcmf32s[1].resize(n);
+      for (uint64_t i = 0; i < n; i++) {
+        pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
+        pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
+      }
+    }
  }
  {
    static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
--- a/bindings/ruby/tests/test_callback.rb
+++ b/bindings/ruby/tests/test_callback.rb
@ -25,7 +25,7 @@ class TestCallback < TestBase
        assert start_time >= 0
        assert_kind_of Integer, end_time
        assert end_time > 0
-        assert_match(/ask not what your country can do for you, ask what you can do for your country/, text) if i_segment == 0
+        assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
      end
    }

@ -145,9 +145,9 @@ class TestCallback < TestBase

  def test_abort_on
    do_abort = false
-    _aborted_from_callback = false
+    aborted_from_callback = false
    @params.on_new_segment do |segment|
-      do_abort = true if segment.text.match?(/ask/)
+      do_abort = true if segment.text.match? /ask/
    end
    i = 0
    @params.abort_on do
--- a/bindings/ruby/tests/test_error.rb
+++ b/bindings/ruby/tests/test_error.rb
@ -4,7 +4,7 @@ class TestError < TestBase
  def test_error
    error = Whisper::Error.new(-2)
    assert_equal "failed to compute log mel spectrogram", error.message
-    assert_equal(-2, error.code)
+    assert_equal -2, error.code
  end

  def test_unknown_error
@ -14,7 +14,7 @@ class TestError < TestBase

  def test_non_int_code
    assert_raise TypeError do
-      _error = Whisper::Error.new("non int")
+      error = Whisper::Error.new("non int")
    end
  end
 end
--- a/bindings/ruby/tests/test_params.rb
+++ b/bindings/ruby/tests/test_params.rb
@ -162,7 +162,7 @@ class TestParams < TestBase
  end

  def test_length_penalty
-    assert_equal(-1.0, @params.length_penalty)
+    assert_equal -1.0, @params.length_penalty
    @params.length_penalty = 0.5
    assert_equal 0.5, @params.length_penalty
  end
@ -180,9 +180,9 @@ class TestParams < TestBase
  end

  def test_logprob_thold
-    assert_in_delta(-1.0, @params.logprob_thold)
+    assert_in_delta -1.0, @params.logprob_thold
    @params.logprob_thold = -0.5
-    assert_in_delta(-0.5, @params.logprob_thold)
+    assert_in_delta -0.5, @params.logprob_thold
  end

  def test_no_speech_thold
--- a/bindings/ruby/tests/test_segment.rb
+++ b/bindings/ruby/tests/test_segment.rb
@ -49,13 +49,13 @@ class TestSegment < TestBase
      if index == 0
        seg = segment
        assert_equal 0, segment.start_time
-        assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
+        assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
      end
      index += 1
    end
    whisper.transcribe(AUDIO, params)
    assert_equal 0, seg.start_time
-    assert_match(/ask not what your country can do for you, ask what you can do for your country/, seg.text)
+    assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
  end

  def test_on_new_segment_twice
--- a/bindings/ruby/tests/test_whisper.rb
+++ b/bindings/ruby/tests/test_whisper.rb
@ -16,7 +16,7 @@ class TestWhisper < TestBase
    params.print_timestamps = false

    @whisper.transcribe(AUDIO, params) {|text|
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, text
    }
  end

@ -32,7 +32,7 @@ class TestWhisper < TestBase
    def test_full_get_segment
      segment = whisper.full_get_segment(0)
      assert_equal 0, segment.start_time
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
    end

    def test_full_get_segment_t0
@ -59,7 +59,7 @@ class TestWhisper < TestBase
    end

    def test_full_get_segment_text
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0))
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
    end

    def test_full_get_segment_no_speech_prob
@ -134,14 +134,14 @@ class TestWhisper < TestBase
      @whisper.full(@params, @samples, @samples.length)

      assert_equal 1, @whisper.full_n_segments
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
    end

    def test_full_without_length
      @whisper.full(@params, @samples)

      assert_equal 1, @whisper.full_n_segments
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
    end

    def test_full_enumerator
@ -149,7 +149,7 @@ class TestWhisper < TestBase
      @whisper.full(@params, samples, @samples.length)

      assert_equal 1, @whisper.full_n_segments
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
    end

    def test_full_enumerator_without_length
@ -171,28 +171,26 @@ class TestWhisper < TestBase
      @whisper.full(@params, samples)

      assert_equal 1, @whisper.full_n_segments
-      assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
+      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
    end

    def test_full_parallel
-      nprocessors = 2
-      @whisper.full_parallel(@params, @samples, @samples.length, nprocessors)
+      @whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)

-      assert_equal nprocessors, @whisper.full_n_segments
+      assert_equal Etc.nprocessors, @whisper.full_n_segments
      text = @whisper.each_segment.collect(&:text).join
-      assert_match(/ask what you can do/i, text)
-      assert_match(/for your country/i, text)
+      assert_match /ask what you can do/i, text
+      assert_match /for your country/i, text
    end

    def test_full_parallel_with_memory_view
-      nprocessors = 2
      samples = JFKReader.new(AUDIO)
-      @whisper.full_parallel(@params, samples, nil, nprocessors)
+      @whisper.full_parallel(@params, samples, nil, Etc.nprocessors)

-      assert_equal nprocessors, @whisper.full_n_segments
+      assert_equal Etc.nprocessors, @whisper.full_n_segments
      text = @whisper.each_segment.collect(&:text).join
-      assert_match(/ask what you can do/i, text)
-      assert_match(/for your country/i, text)
+      assert_match /ask what you can do/i, text
+      assert_match /for your country/i, text
    end

    def test_full_parallel_without_length_and_n_processors
@ -200,18 +198,17 @@ class TestWhisper < TestBase

      assert_equal 1, @whisper.full_n_segments
      text = @whisper.each_segment.collect(&:text).join
-      assert_match(/ask what you can do/i, text)
-      assert_match(/for your country/i, text)
+      assert_match /ask what you can do/i, text
+      assert_match /for your country/i, text
    end

    def test_full_parallel_without_length
-      nprocessors = 2
-      @whisper.full_parallel(@params, @samples, nil, nprocessors)
+      @whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)

-      assert_equal nprocessors, @whisper.full_n_segments
+      assert_equal Etc.nprocessors, @whisper.full_n_segments
      text = @whisper.each_segment.collect(&:text).join
-      assert_match(/ask what you can do/i, text)
-      assert_match(/for your country/i, text)
+      assert_match /ask what you can do/i, text
+      assert_match /for your country/i, text
    end

    def test_full_parallel_without_n_processors
@ -219,8 +216,8 @@ class TestWhisper < TestBase

      assert_equal 1, @whisper.full_n_segments
      text = @whisper.each_segment.collect(&:text).join
-      assert_match(/ask what you can do/i, text)
-      assert_match(/for your country/i, text)
+      assert_match /ask what you can do/i, text
+      assert_match /for your country/i, text
    end
  end
 end
--- a/build-xcframework.sh
+++ b/build-xcframework.sh
@ -1,535 +0,0 @@
-#!/bin/bash
-#
-# Options
-IOS_MIN_OS_VERSION=16.4
-MACOS_MIN_OS_VERSION=13.3
-VISIONOS_MIN_OS_VERSION=1.0
-TVOS_MIN_OS_VERSION=16.4
-
-BUILD_SHARED_LIBS=OFF
-WHISPER_BUILD_EXAMPLES=OFF
-WHISPER_BUILD_TESTS=OFF
-WHISPER_BUILD_SERVER=OFF
-GGML_METAL=ON
-GGML_METAL_EMBED_LIBRARY=ON
-GGML_BLAS_DEFAULT=ON
-GGML_METAL_USE_BF16=ON
-GGML_OPENMP=OFF
-
-COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
-COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
-
-# Common options for all builds
-COMMON_CMAKE_ARGS=(
-    -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
-    -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
-    -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
-    -DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
-    -DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
-    -DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
-    -DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
-    -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-    -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-    -DWHISPER_BUILD_EXAMPLES=${WHISPER_BUILD_EXAMPLES}
-    -DWHISPER_BUILD_TESTS=${WHISPER_BUILD_TESTS}
-    -DWHISPER_BUILD_SERVER=${WHISPER_BUILD_SERVER}
-    -DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
-    -DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
-    -DGGML_METAL=${GGML_METAL}
-    -DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
-    -DGGML_NATIVE=OFF
-    -DGGML_OPENMP=${GGML_OPENMP}
-)
-
-check_required_tool() {
-    local tool=$1
-    local install_message=$2
-
-    if ! command -v $tool &> /dev/null; then
-        echo "Error: $tool is required but not found."
-        echo "$install_message"
-        exit 1
-    fi
-}
-echo "Checking for required tools..."
-check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
-check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
-check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
-check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
-
-set -e
-
-## Clean up previous builds
-rm -rf build-apple
-rm -rf build-ios-sim
-rm -rf build-ios-device
-rm -rf build-macos
-rm -rf build-visionos
-rm -rf build-visionos-sim
-rm -rf build-tvos-sim
-rm -rf build-tvos-device
-
-# Setup the xcframework build directory structure
-setup_framework_structure() {
-    local build_dir=$1
-    local min_os_version=$2
-    local platform=$3  # "ios", "macos", "visionos", or "tvos"
-    local framework_name="whisper"
-
-    echo "Creating ${platform}-style framework structure for ${build_dir}"
-
-    if [[ "$platform" == "macos" ]]; then
-        # macOS versioned structure uses versioned directories
-        mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
-        mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
-        mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
-
-        # Create symbolic links
-        ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
-        ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
-        ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
-        ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
-        ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
-
-        # Set header and module paths
-        local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
-        local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
-    else
-        # iOS/VisionOS/tvOS use a flat structure
-        mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
-        mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
-
-        # Remove any existing structure to ensure clean build
-        rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
-
-        # Set header and module paths
-        local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
-        local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
-    fi
-
-    # Copy all required headers (common for all platforms)
-    cp include/whisper.h           ${header_path}
-    cp ggml/include/ggml.h         ${header_path}
-    cp ggml/include/ggml-alloc.h   ${header_path}
-    cp ggml/include/ggml-backend.h ${header_path}
-    cp ggml/include/ggml-metal.h   ${header_path}
-    cp ggml/include/ggml-cpu.h     ${header_path}
-    cp ggml/include/ggml-blas.h    ${header_path}
-    cp ggml/include/gguf.h         ${header_path}
-
-    # Create module map (common for all platforms)
-    cat > ${module_path}module.modulemap << EOF
-framework module whisper {
-    header "whisper.h"
-    header "ggml.h"
-    header "ggml-alloc.h"
-    header "ggml-backend.h"
-    header "ggml-metal.h"
-    header "ggml-cpu.h"
-    header "ggml-blas.h"
-    header "gguf.h"
-
-    link "c++"
-    link framework "Accelerate"
-    link framework "Metal"
-    link framework "Foundation"
-
-    export *
-}
-EOF
-
-    # Platform-specific settings for Info.plist
-    local platform_name=""
-    local sdk_name=""
-    local supported_platform=""
-
-    case "$platform" in
-        "ios")
-            platform_name="iphoneos"
-            sdk_name="iphoneos${min_os_version}"
-            supported_platform="iPhoneOS"
-            local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
-            local device_family='    <key>UIDeviceFamily</key>
-    <array>
-        <integer>1</integer>
-        <integer>2</integer>
-    </array>'
-            ;;
-        "macos")
-            platform_name="macosx"
-            sdk_name="macosx${min_os_version}"
-            supported_platform="MacOSX"
-            local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
-            local device_family=""
-            ;;
-        "visionos")
-            platform_name="xros"
-            sdk_name="xros${min_os_version}"
-            supported_platform="XRPlatform"
-            local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
-            local device_family=""
-            ;;
-        "tvos")
-            platform_name="appletvos"
-            sdk_name="appletvos${min_os_version}"
-            supported_platform="AppleTVOS"
-            local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
-            local device_family='    <key>UIDeviceFamily</key>
-    <array>
-        <integer>3</integer>
-    </array>'
-            ;;
-    esac
-
-    # Create Info.plist
-    cat > ${plist_path} << EOF
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>CFBundleDevelopmentRegion</key>
-    <string>en</string>
-    <key>CFBundleExecutable</key>
-    <string>whisper</string>
-    <key>CFBundleIdentifier</key>
-    <string>org.ggml.whisper</string>
-    <key>CFBundleInfoDictionaryVersion</key>
-    <string>6.0</string>
-    <key>CFBundleName</key>
-    <string>whisper</string>
-    <key>CFBundlePackageType</key>
-    <string>FMWK</string>
-    <key>CFBundleShortVersionString</key>
-    <string>1.0</string>
-    <key>CFBundleVersion</key>
-    <string>1</string>
-    <key>MinimumOSVersion</key>
-    <string>${min_os_version}</string>
-    <key>CFBundleSupportedPlatforms</key>
-    <array>
-        <string>${supported_platform}</string>
-    </array>${device_family}
-    <key>DTPlatformName</key>
-    <string>${platform_name}</string>
-    <key>DTSDKName</key>
-    <string>${sdk_name}</string>
-</dict>
-</plist>
-EOF
-}
-
-# Create dynamic libraries from static libraries.
-combine_static_libraries() {
-    local build_dir="$1"
-    local release_dir="$2"
-    local platform="$3"  # "ios", "macos", "visionos", or "tvos"
-    local is_simulator="$4"
-    local base_dir="$(pwd)"
-    local framework_name="whisper"
-
-    # Determine output path based on platform
-    local output_lib=""
-    if [[ "$platform" == "macos" ]]; then
-        # macOS uses versioned structure
-        output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
-    else
-        # iOS, visionOS, and tvOS use a directory flat structure
-        output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
-    fi
-
-    local libs=(
-        "${base_dir}/${build_dir}/src/${release_dir}/libwhisper.a"
-        "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
-        "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
-        "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
-        "${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
-        "${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
-    )
-    if [[ "$platform" == "macos" || "$platform" == "ios" ]]; then
-        echo "Adding libwhisper.coreml library to the build."
-        libs+=(
-            "${base_dir}/${build_dir}/src/${release_dir}/libwhisper.coreml.a"
-        )
-    fi
-
-    # Create temporary directory for processing
-    local temp_dir="${base_dir}/${build_dir}/temp"
-    echo "Creating temporary directory: ${temp_dir}"
-    mkdir -p "${temp_dir}"
-
-    # Since we have multiple architectures libtool will find object files that do not
-    # match the target architecture. We suppress these warnings.
-    libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
-
-    # Determine SDK, architectures, and install_name based on platform and simulator flag.
-    local sdk=""
-    local archs=""
-    local min_version_flag=""
-    local install_name=""
-    local frameworks="-framework Foundation -framework Metal -framework Accelerate"
-
-    case "$platform" in
-        "ios")
-            if [[ "$is_simulator" == "true" ]]; then
-                sdk="iphonesimulator"
-                archs="arm64 x86_64"
-                min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
-            else
-                sdk="iphoneos"
-                archs="arm64"
-                min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
-            fi
-            install_name="@rpath/whisper.framework/whisper"
-            frameworks+=" -framework CoreML"
-            ;;
-        "macos")
-            sdk="macosx"
-            archs="arm64 x86_64"
-            min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
-            install_name="@rpath/whisper.framework/Versions/Current/whisper"
-            frameworks+=" -framework CoreML"
-            ;;
-        "visionos")
-            if [[ "$is_simulator" == "true" ]]; then
-                sdk="xrsimulator"
-                archs="arm64 x86_64"
-                min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
-            else
-                sdk="xros"
-                archs="arm64"
-                min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
-            fi
-            # Use flat structure for visionOS, same as iOS
-            install_name="@rpath/whisper.framework/whisper"
-            ;;
-        "tvos")
-            if [[ "$is_simulator" == "true" ]]; then
-                sdk="appletvsimulator"
-                archs="arm64 x86_64"
-                min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
-            else
-                sdk="appletvos"
-                archs="arm64"
-                min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
-            fi
-            install_name="@rpath/whisper.framework/whisper"
-            ;;
-    esac
-
-    # Build architecture flags
-    local arch_flags=""
-    for arch in $archs; do
-        arch_flags+=" -arch $arch"
-    done
-
-    # Create dynamic library
-    echo "Creating dynamic library for ${platform}."
-    xcrun -sdk $sdk clang++ -dynamiclib \
-        -isysroot $(xcrun --sdk $sdk --show-sdk-path) \
-        $arch_flags \
-        $min_version_flag \
-        -Wl,-force_load,"${temp_dir}/combined.a" \
-        $frameworks \
-        -install_name "$install_name" \
-        -o "${base_dir}/${output_lib}"
-
-    # Platform-specific post-processing for device builds
-    if [[ "$is_simulator" == "false" ]]; then
-        if command -v vtool &>/dev/null; then
-            case "$platform" in
-                "ios")
-                    echo "Marking binary as a framework binary for iOS..."
-                    vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
-                        -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
-                    ;;
-                "visionos")
-                    echo "Marking binary as a framework binary for visionOS..."
-                    vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
-                        -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
-                    ;;
-                "tvos")
-                    echo "Marking binary as a framework binary for tvOS..."
-                    vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
-                        -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
-                    ;;
-            esac
-        else
-            echo "Warning: vtool not found. Binary may not pass App Store validation."
-        fi
-    fi
-
-    echo "Creating properly formatted dSYM..."
-    # Create a separate directory for dSYMs for all platforms
-    mkdir -p "${base_dir}/${build_dir}/dSYMs"
-
-    # iOS and visionOS style dSYM (flat structure)
-    if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
-        # Generate dSYM in the dSYMs directory
-        xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
-
-        # Create a copy of the binary that will be stripped
-        cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
-
-        # Strip debug symbols from the copy
-        xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
-
-        # Replace the original with the stripped version
-        mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
-    else
-        # macOS style dSYM
-        # First strip debug info to a separate file
-        xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
-
-        # Generate dSYM in the dSYMs directory
-        xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
-
-        # Replace original binary with stripped version
-        mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
-    fi
-
-    # Remove any automatically generated dSYM files in the framework structure as they will
-    # otherwise case Invalid Bundle Structure validation errors.
-    if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
-        echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
-        rm -rf "${base_dir}/${output_lib}.dSYM"
-    fi
-
-    # Clean up
-    rm -rf "${temp_dir}"
-}
-
-echo "Building for iOS simulator..."
-cmake -B build-ios-sim -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-    -DIOS=ON \
-    -DCMAKE_SYSTEM_NAME=iOS \
-    -DCMAKE_OSX_SYSROOT=iphonesimulator \
-    -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
-    -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-    -DWHISPER_COREML="ON" \
-    -DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-    -S .
-cmake --build build-ios-sim --config Release -- -quiet
-
-echo "Building for iOS devices..."
-cmake -B build-ios-device -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-    -DCMAKE_OSX_SYSROOT=iphoneos \
-    -DCMAKE_OSX_ARCHITECTURES="arm64" \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
-    -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-    -DWHISPER_COREML="ON" \
-    -DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-    -S .
-cmake --build build-ios-device --config Release -- -quiet
-
-echo "Building for macOS..."
-cmake -B build-macos -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
-    -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-    -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-    -DWHISPER_COREML="ON" \
-    -DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-    -S .
-cmake --build build-macos --config Release -- -quiet
-
-echo "Building for visionOS..."
-cmake -B build-visionos -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-    -DCMAKE_OSX_ARCHITECTURES="arm64" \
-    -DCMAKE_SYSTEM_NAME=visionOS \
-    -DCMAKE_OSX_SYSROOT=xros \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-    -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-    -S .
-cmake --build build-visionos --config Release -- -quiet
-
-echo "Building for visionOS simulator..."
-cmake -B build-visionos-sim -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-    -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-    -DCMAKE_SYSTEM_NAME=visionOS \
-    -DCMAKE_OSX_SYSROOT=xrsimulator \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-    -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-    -S .
-cmake --build build-visionos-sim --config Release -- -quiet
-
-# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
-echo "Building for tvOS simulator..."
-cmake -B build-tvos-sim -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-    -DCMAKE_SYSTEM_NAME=tvOS \
-    -DCMAKE_OSX_SYSROOT=appletvsimulator \
-    -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-    -DGGML_METAL=ON \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
-    -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-    -S .
-cmake --build build-tvos-sim --config Release -- -quiet
-
-echo "Building for tvOS devices..."
-cmake -B build-tvos-device -G Xcode \
-    "${COMMON_CMAKE_ARGS[@]}" \
-    -DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-    -DCMAKE_SYSTEM_NAME=tvOS \
-    -DCMAKE_OSX_SYSROOT=appletvos \
-    -DCMAKE_OSX_ARCHITECTURES="arm64" \
-    -DGGML_METAL=ON \
-    -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
-    -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-    -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-    -S .
-cmake --build build-tvos-device --config Release -- -quiet
-
-# Setup frameworks and copy binaries and headers
-echo "Setting up framework structures..."
-setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
-setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
-setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
-setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
-setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
-setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
-setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
-
-# Create dynamic libraries from static libraries
-echo "Creating dynamic libraries from static libraries..."
-combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
-combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
-combine_static_libraries "build-macos" "Release" "macos" "false"
-combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
-combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
-combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
-combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
-
-# Create XCFramework with correct debug symbols paths
-echo "Creating XCFramework..."
-xcodebuild -create-xcframework \
-    -framework $(pwd)/build-ios-sim/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \
-    -framework $(pwd)/build-ios-device/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-ios-device/dSYMs/whisper.dSYM \
-    -framework $(pwd)/build-macos/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-macos/dSYMS/whisper.dSYM \
-    -framework $(pwd)/build-visionos/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-visionos/dSYMs/whisper.dSYM \
-    -framework $(pwd)/build-visionos-sim/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-visionos-sim/dSYMs/whisper.dSYM \
-    -framework $(pwd)/build-tvos-device/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-tvos-device/dSYMs/whisper.dSYM \
-    -framework $(pwd)/build-tvos-sim/framework/whisper.framework \
-    -debug-symbols $(pwd)/build-tvos-sim/dSYMs/whisper.dSYM \
-    -output $(pwd)/build-apple/whisper.xcframework
--- a/ci/README.md
+++ b/ci/README.md
@ -1,41 +0,0 @@
-# CI
-
-In addition to [Github Actions](https://github.com/ggerganov/whisper.cpp/actions) `whisper.cpp` uses a custom CI framework:
-
-https://github.com/ggml-org/ci
-
-It monitors the `master` branch for new commits and runs the
-[ci/run.sh](https://github.com/ggerganov/whisper.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
-to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
-to cover various hardware architectures, including GPU and Apple Silicon instances.
-
-Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
-Only the branches of this repo are monitored for this keyword.
-
-It is a good practice, before publishing changes to execute the full CI locally on your machine:
-
-```bash
-mkdir tmp
-
-# CPU-only build
-bash ./ci/run.sh ./tmp/results ./tmp/mnt
-
-# with CUDA support
-GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
-```
-
-## Environment Variables
-
-The CI script supports several environment variables to control the build:
-
-| Variable | Description |
-|----------|-------------|
-| `GG_BUILD_CUDA` | Enable NVIDIA CUDA GPU acceleration |
-| `GG_BUILD_SYCL` | Enable Intel SYCL acceleration |
-| `GG_BUILD_VULKAN` | Enable Vulkan GPU acceleration |
-| `GG_BUILD_METAL` | Enable Metal acceleration on Apple Silicon |
-| `GG_BUILD_BLAS` | Enable BLAS CPU acceleration |
-| `GG_BUILD_OPENVINO` | Enable OpenVINO support |
-| `GG_BUILD_COREML` | Enable Core ML support for Apple Neural Engine |
-| `GG_BUILD_LOW_PERF` | Limit tests for low-performance hardware |
-| `GG_BUILD_TEST_MODELS` | Comma-separated list of models to test (e.g. "tiny.en,tiny,base,medium", defaults to all models unless `GG_BUILD_LOW_PERF` is set) |
--- a/ci/run.sh
+++ b/ci/run.sh
@ -1,336 +0,0 @@
-#!/bin/bash
-#
-# sample usage:
-#
-# mkdir tmp
-#
-# # CPU-only build
-# bash ./ci/run.sh ./tmp/results ./tmp/mnt
-#
-# # with CUDA support
-# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
-#
-# # with SYCL support
-# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
-
-if [ -z "$2" ]; then
-    echo "usage: $0 <output-dir> <mnt-dir>"
-    exit 1
-fi
-
-mkdir -p "$1"
-mkdir -p "$2"
-
-OUT=$(realpath "$1")
-MNT=$(realpath "$2")
-
-rm -f "$OUT/*.log"
-rm -f "$OUT/*.exit"
-rm -f "$OUT/*.md"
-
-sd=`dirname $0`
-cd $sd/../
-SRC=`pwd`
-
-ALL_MODELS=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
-BENCH_N_THREADS=4
-BENCH_ENCODER_ONLY=0
-BENCH_FLASH_ATTN=0
-
-# check for user-specified models first. if not specified, use fast models
-if [ ! -z ${GG_BUILD_TEST_MODELS} ]; then
-    IFS=',' read -r -a MODELS <<< "${GG_BUILD_TEST_MODELS}"
-else
-    if [ ! -z ${GG_BUILD_LOW_PERF} ]; then
-        MODELS=( "tiny" "base" "small" )
-    else
-        MODELS=("${ALL_MODELS[@]}")
-    fi
-fi
-
-CMAKE_EXTRA="-DWHISPER_FATAL_WARNINGS=ON"
-
-if [ ! -z ${GG_BUILD_CUDA} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
-fi
-
-if [ ! -z ${GG_BUILD_SYCL} ]; then
-    if [ -z ${ONEAPI_ROOT} ]; then
-        echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
-        echo "source /opt/intel/oneapi/setvars.sh"
-        exit 1
-    fi
-
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
-fi
-
-if [ ! -z ${GG_BUILD_OPENVINO} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON"
-fi
-
-if [ ! -z ${GG_BUILD_METAL} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
-fi
-
-if [ ! -z ${GG_BUILD_VULKAN} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=ON"
-fi
-
-if [ ! -z ${GG_BUILD_BLAS} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_BLAS=ON"
-fi
-
-if [ ! -z ${GG_BUILD_COREML} ]; then
-    CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_COREML=ON"
-fi
-
-## helpers
-
-# download a file if it does not exist or if it is outdated
-function gg_wget {
-    local out=$1
-    local url=$2
-
-    local cwd=`pwd`
-
-    mkdir -p $out
-    cd $out
-
-    # should not re-download if file is the same
-    wget -nv -N $url
-
-    cd $cwd
-}
-
-function gg_download_model {
-    local model_name=$1
-    local model_file="$MNT/models/ggml-${model_name}.bin"
-
-    if [ ! -f ${model_file} ]; then
-        local cwd=`pwd`
-        mkdir -p "$MNT/models"
-        cd "$MNT/models"
-        bash "$cwd/models/download-ggml-model.sh" ${model_name} .
-        cd "$cwd"
-    fi
-}
-
-function gg_printf {
-    printf -- "$@" >> $OUT/README.md
-}
-
-# Helper function to check command exit status
-function gg_check_last_command_status {
-    local exit_file=$1
-    local command_name=$2
-
-    local exit_status=$?
-    echo "$exit_status" > "$exit_file"
-
-    if [ $exit_status -ne 0 ]; then
-        echo "Error: Command $command_name failed with exit status $exit_status"
-        return 1
-    fi
-
-    return 0
-}
-
-# Usage: gg_run <test_name> [additional_args...]
-#
-# Parameters:
-#   test_name       - Name of the test to run (calls gg_run_<test_name>)
-#   additional_args - Any additional arguments to pass to the test function (first argument is appended to the log filename)
-function gg_run {
-    ci=$1
-
-    if [ $# -gt 1 ]; then
-        ci="${ci}_${2}"
-    fi
-
-    set -o pipefail
-    set -x
-
-    gg_run_$1 "$@" | tee $OUT/$ci.log
-    cur=$?
-    echo "$cur" > $OUT/$ci.exit
-
-    set +x
-    set +o pipefail
-
-    gg_sum_$1 "$@"
-
-    ret=$((ret | cur))
-}
-
-function gg_check_build_requirements {
-    if ! command -v cmake &> /dev/null; then
-        gg_printf 'cmake not found, please install'
-    fi
-
-    if ! command -v make &> /dev/null; then
-        gg_printf 'make not found, please install'
-    fi
-}
-
-## ci
-
-function gg_run_ctest {
-    mode=$2
-
-    cd ${SRC}
-    
-    rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode}
-
-    set -e
-
-    gg_check_build_requirements
-
-    (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
-    (time make -j$(nproc)                                    ) 2>&1 | tee -a $OUT/${ci}-make.log
-
-    (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
-
-    set +e
-}
-
-function gg_sum_ctest {
-    mode=$2
-
-    gg_printf '### %s\n\n' "${ci}"
-
-    gg_printf 'Runs ctest in '${mode}' mode\n'
-    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
-    gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
-    gg_printf '```\n'
-}
-
-function gg_run_bench {
-    cd ${SRC}
-
-    # set flash attention flag if enabled
-    fattn=""
-    if [ "$BENCH_FLASH_ATTN" -eq 1 ]; then
-        fattn="-fa"
-    fi
-
-    # run memcpy benchmark if not encoder-only mode
-    if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
-        echo "Running memcpy benchmark"
-        (time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-memcpy.log
-        gg_check_last_command_status "$OUT/${ci}-memcpy.exit" "memcpy benchmark"
-        
-        echo "Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads"
-        (time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-mul_mat.log
-        gg_check_last_command_status "$OUT/${ci}-mul_mat.exit" "ggml_mul_mat benchmark"
-    fi
-
-    echo "Running benchmark for all models"
-
-    # generate header for the benchmark table
-    {
-        printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "Config" "Model" "Th" "FA" "Enc." "Dec." "Bch5" "PP" "Commit"
-        printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---"
-    } | tee -a $OUT/${ci}-models-table.log
-
-    # run benchmark for each model
-    for model in "${MODELS[@]}"; do
-        echo "Benchmarking model: $model"
-
-        # run the benchmark and capture output
-        output=$(./build-ci-release/bin/whisper-bench -m $MNT/models/ggml-$model.bin -t $BENCH_N_THREADS $fattn 2>&1)
-        ret=$?
-
-        # save the raw output
-        echo "$output" > $OUT/${ci}-bench-$model.log
-
-        if [ $ret -eq 0 ]; then
-            # parse the benchmark results
-            encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
-            decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
-            batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
-            prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
-            system_info=$(echo "$output" | grep "system_info")
-            actual_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
-
-            # determine configuration
-            config=""
-            if [[ $system_info == *"AVX2 = 1"* ]]; then
-                config="$config AVX2"
-            fi
-            if [[ $system_info == *"NEON = 1"* ]]; then
-                config="$config NEON"
-            fi
-            if [[ $system_info == *"BLAS = 1"* ]]; then
-                config="$config BLAS"
-            fi
-            if [[ $system_info == *"COREML = 1"* ]]; then
-                config="$config COREML"
-            fi
-            if [[ $system_info == *"CUDA = 1"* ]]; then
-                config="$config CUDA"
-            fi
-            if [[ $system_info == *"METAL = 1"* ]]; then
-                config="$config METAL"
-            fi
-
-            # get commit hash
-            commit=$(git rev-parse --short HEAD)
-
-            # add row to benchmark table
-            printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" \
-                "$config" "$model" "$actual_threads" "$BENCH_FLASH_ATTN" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit" \
-                | tee -a $OUT/${ci}-models-table.log
-        else
-            echo "Benchmark failed for model: $model" | tee -a $OUT/${ci}-bench-errors.log
-        fi
-    done
-}
-
-function gg_sum_bench {
-    gg_printf '### %s\n\n' "${ci}"
-
-    gg_printf 'Whisper Benchmark Results\n'
-    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
-
-    # show memcpy and ggml_mul_mat benchmark results if available
-    if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
-        if [ -f "$OUT/${ci}-memcpy.log" ]; then
-            gg_printf '#### memcpy Benchmark\n\n'
-            gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-memcpy.log)"
-        fi
-
-        if [ -f "$OUT/${ci}-mul_mat.log" ]; then
-            gg_printf '#### ggml_mul_mat Benchmark\n\n'
-            gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-mul_mat.log)"
-        fi
-    fi
-
-    # show model benchmark results
-    gg_printf '#### Model Benchmarks\n\n'
-    if [ -f "$OUT/${ci}-models-table.log" ]; then
-        gg_printf '%s\n\n' "$(cat $OUT/${ci}-models-table.log)"
-    else
-        gg_printf 'No model benchmark results available.\n\n'
-    fi
-
-    # show any errors that occurred
-    if [ -f "$OUT/${ci}-bench-errors.log" ]; then
-        gg_printf '#### Benchmark Errors\n\n'
-        gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-bench-errors.log)"
-    fi
-}
-
-ret=0
-
-for model in "${MODELS[@]}"; do
-    test $ret -eq 0 && gg_download_model ${model}
-done
-if [ -z ${GG_BUILD_SYCL}]; then
-    test $ret -eq 0 && gg_run ctest debug
-fi
-test $ret -eq 0 && gg_run ctest release
-
-test $ret -eq 0 && gg_run bench
-
-exit $ret
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -14,6 +14,10 @@ if (WHISPER_SDL2)
    message(STATUS "SDL2_LIBRARIES    = ${SDL2_LIBRARIES}")
 endif()

+if (WHISPER_CLBLAST)
+    find_package(CLBlast REQUIRED)
+endif()
+
 # common

 set(TARGET common)
@ -52,8 +56,6 @@ add_library(${TARGET} STATIC
    common.cpp
    common-ggml.h
    common-ggml.cpp
-    common-whisper.h
-    common-whisper.cpp
    grammar-parser.h
    grammar-parser.cpp
    ${COMMON_SOURCES_FFMPEG}
@ -61,7 +63,7 @@ add_library(${TARGET} STATIC

 include(DefaultTargetOptions)

-target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})
+target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS})

 set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
--- a/examples/addon.node/test/whisper.spec.js
+++ b/examples/addon.node/test/whisper.spec.js
@ -18,7 +18,6 @@ const whisperParamsMock = {
  translate: true,
  no_timestamps: false,
  audio_ctx: 0,
-  max_len: 0,
 };

 describe("Run whisper.node", () => {
--- a/examples/addon.node/addon.cpp
+++ b/examples/addon.node/addon.cpp
@ -1,6 +1,5 @@
 #include "napi.h"
 #include "common.h"
-#include "common-whisper.h"

 #include "whisper.h"

@ -128,227 +127,192 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper

 void cb_log_disable(enum ggml_log_level, const char *, void *) {}

-class ProgressWorker : public Napi::AsyncWorker {
- public:
-    ProgressWorker(Napi::Function& callback, whisper_params params, Napi::Function progress_callback, Napi::Env env)
-        : Napi::AsyncWorker(callback), params(params), env(env) {
-        // Create thread-safe function
-        if (!progress_callback.IsEmpty()) {
-            tsfn = Napi::ThreadSafeFunction::New(
-                env,
-                progress_callback,
-                "Progress Callback",
-                0,
-                1
-            );
-        }
+int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
+    if (params.no_prints) {
+        whisper_log_set(cb_log_disable, NULL);
    }

-    ~ProgressWorker() {
-        if (tsfn) {
-            // Make sure to release the thread-safe function on destruction
-            tsfn.Release();
-        }
+    if (params.fname_inp.empty() && params.pcmf32.empty()) {
+        fprintf(stderr, "error: no input files or audio buffer specified\n");
+        return 2;
    }

-    void Execute() override {
-        // Use custom run function with progress callback support
-        run_with_progress(params, result);
+    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
+        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
+        exit(0);
    }

-    void OnOK() override {
-        Napi::HandleScope scope(Env());
-        Napi::Object res = Napi::Array::New(Env(), result.size());
-        for (uint64_t i = 0; i < result.size(); ++i) {
-            Napi::Object tmp = Napi::Array::New(Env(), 3);
-            for (uint64_t j = 0; j < 3; ++j) {
-                tmp[j] = Napi::String::New(Env(), result[i][j]);
+    // whisper init
+
+    struct whisper_context_params cparams = whisper_context_default_params();
+    cparams.use_gpu = params.use_gpu;
+    cparams.flash_attn = params.flash_attn;
+    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
+
+    if (ctx == nullptr) {
+        fprintf(stderr, "error: failed to initialize whisper context\n");
+        return 3;
+    }
+
+    // if params.pcmf32 is provided, set params.fname_inp to "buffer"
+    // this is simpler than further modifications in the code
+    if (!params.pcmf32.empty()) {
+        fprintf(stderr, "info: using audio buffer as input\n");
+        params.fname_inp.clear();
+        params.fname_inp.emplace_back("buffer");
+    }
+
+    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
+        const auto fname_inp = params.fname_inp[f];
+        const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
+
+        std::vector<float> pcmf32; // mono-channel F32 PCM
+        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
+
+        // read the input audio file if params.pcmf32 is not provided
+        if (params.pcmf32.empty()) {
+            if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
+                fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
+                continue;
+            }
+        } else {
+            pcmf32 = params.pcmf32;
+        }
+
+        // print system information
+        if (!params.no_prints) {
+            fprintf(stderr, "\n");
+            fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
+                    params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
+        }
+
+        // print some info about the processing
+        if (!params.no_prints) {
+            fprintf(stderr, "\n");
+            if (!whisper_is_multilingual(ctx)) {
+                if (params.language != "en" || params.translate) {
+                    params.language = "en";
+                    params.translate = false;
+                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
+                }
+            }
+            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
+                    __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
+                    params.n_threads, params.n_processors,
+                    params.language.c_str(),
+                    params.translate ? "translate" : "transcribe",
+                    params.no_timestamps ? 0 : 1,
+                    params.audio_ctx);
+
+            fprintf(stderr, "\n");
+        }
+
+        // run the inference
+        {
+            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+
+            wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
+
+            wparams.print_realtime   = false;
+            wparams.print_progress   = params.print_progress;
+            wparams.print_timestamps = !params.no_timestamps;
+            wparams.print_special    = params.print_special;
+            wparams.translate        = params.translate;
+            wparams.language         = params.language.c_str();
+            wparams.n_threads        = params.n_threads;
+            wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
+            wparams.offset_ms        = params.offset_t_ms;
+            wparams.duration_ms      = params.duration_ms;
+
+            wparams.token_timestamps = params.output_wts || params.max_len > 0;
+            wparams.thold_pt         = params.word_thold;
+            wparams.entropy_thold    = params.entropy_thold;
+            wparams.logprob_thold    = params.logprob_thold;
+            wparams.max_len          = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
+            wparams.audio_ctx        = params.audio_ctx;
+
+            wparams.greedy.best_of        = params.best_of;
+            wparams.beam_search.beam_size = params.beam_size;
+
+            wparams.initial_prompt   = params.prompt.c_str();
+
+            wparams.no_timestamps    = params.no_timestamps;
+
+            whisper_print_user_data user_data = { &params, &pcmf32s };
+
+            // this callback is called on each new segment
+            if (!wparams.print_realtime) {
+                wparams.new_segment_callback           = whisper_print_segment_callback;
+                wparams.new_segment_callback_user_data = &user_data;
+            }
+
+            // example for abort mechanism
+            // in this example, we do not abort the processing, but we could if the flag is set to true
+            // the callback is called before every encoder run - if it returns false, the processing is aborted
+            {
+                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
+
+                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
+                    bool is_aborted = *(bool*)user_data;
+                    return !is_aborted;
+                };
+                wparams.encoder_begin_callback_user_data = &is_aborted;
+            }
+
+            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
+                fprintf(stderr, "failed to process audio\n");
+                return 10;
            }
-            res[i] = tmp;
        }
-        Callback().Call({Env().Null(), res});
    }

-    // Progress callback function - using thread-safe function
-    void OnProgress(int progress) {
-        if (tsfn) {
-            // Use thread-safe function to call JavaScript callback
-            auto callback = [progress](Napi::Env env, Napi::Function jsCallback) {
-                jsCallback.Call({Napi::Number::New(env, progress)});
-            };
-            
-            tsfn.BlockingCall(callback);
-        }
+    const int n_segments = whisper_full_n_segments(ctx);
+    result.resize(n_segments);
+    for (int i = 0; i < n_segments; ++i) {
+        const char * text = whisper_full_get_segment_text(ctx, i);
+        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+
+        result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
+        result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
+        result[i].emplace_back(text);
    }

+    whisper_print_timings(ctx);
+    whisper_free(ctx);
+
+    return 0;
+}
+
+class Worker : public Napi::AsyncWorker {
+ public:
+  Worker(Napi::Function& callback, whisper_params params)
+      : Napi::AsyncWorker(callback), params(params) {}
+
+  void Execute() override {
+    run(params, result);
+  }
+
+  void OnOK() override {
+    Napi::HandleScope scope(Env());
+    Napi::Object res = Napi::Array::New(Env(), result.size());
+    for (uint64_t i = 0; i < result.size(); ++i) {
+      Napi::Object tmp = Napi::Array::New(Env(), 3);
+      for (uint64_t j = 0; j < 3; ++j) {
+        tmp[j] = Napi::String::New(Env(), result[i][j]);
+      }
+      res[i] = tmp;
+    }
+    Callback().Call({Env().Null(), res});
+  }
+
 private:
-    whisper_params params;
-    std::vector<std::vector<std::string>> result;
-    Napi::Env env;
-    Napi::ThreadSafeFunction tsfn;
-
-    // Custom run function with progress callback support
-    int run_with_progress(whisper_params &params, std::vector<std::vector<std::string>> &result) {
-        if (params.no_prints) {
-            whisper_log_set(cb_log_disable, NULL);
-        }
-
-        if (params.fname_inp.empty() && params.pcmf32.empty()) {
-            fprintf(stderr, "error: no input files or audio buffer specified\n");
-            return 2;
-        }
-
-        if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
-            fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-            exit(0);
-        }
-
-        // whisper init
-        struct whisper_context_params cparams = whisper_context_default_params();
-        cparams.use_gpu = params.use_gpu;
-        cparams.flash_attn = params.flash_attn;
-        struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
-
-        if (ctx == nullptr) {
-            fprintf(stderr, "error: failed to initialize whisper context\n");
-            return 3;
-        }
-
-        // If params.pcmf32 provides, set params.fname_inp as "buffer"
-        if (!params.pcmf32.empty()) {
-            fprintf(stderr, "info: using audio buffer as input\n");
-            params.fname_inp.clear();
-            params.fname_inp.emplace_back("buffer");
-        }
-
-        for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
-            const auto fname_inp = params.fname_inp[f];
-            const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
-
-            std::vector<float> pcmf32; // mono-channel F32 PCM
-            std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
-
-            // If params.pcmf32 is empty, read input audio file
-            if (params.pcmf32.empty()) {
-                if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
-                    fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
-                    continue;
-                }
-            } else {
-                pcmf32 = params.pcmf32;
-            }
-
-            // Print system info
-            if (!params.no_prints) {
-                fprintf(stderr, "\n");
-                fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-                        params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
-            }
-
-            // Print processing info
-            if (!params.no_prints) {
-                fprintf(stderr, "\n");
-                if (!whisper_is_multilingual(ctx)) {
-                    if (params.language != "en" || params.translate) {
-                        params.language = "en";
-                        params.translate = false;
-                        fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-                    }
-                }
-                fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
-                        __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                        params.n_threads, params.n_processors,
-                        params.language.c_str(),
-                        params.translate ? "translate" : "transcribe",
-                        params.no_timestamps ? 0 : 1,
-                        params.audio_ctx);
-
-                fprintf(stderr, "\n");
-            }
-
-            // Run inference
-            {
-                whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-                wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
-
-                wparams.print_realtime   = false;
-                wparams.print_progress   = params.print_progress;
-                wparams.print_timestamps = !params.no_timestamps;
-                wparams.print_special    = params.print_special;
-                wparams.translate        = params.translate;
-                wparams.language         = params.language.c_str();
-                wparams.n_threads        = params.n_threads;
-                wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
-                wparams.offset_ms        = params.offset_t_ms;
-                wparams.duration_ms      = params.duration_ms;
-
-                wparams.token_timestamps = params.output_wts || params.max_len > 0;
-                wparams.thold_pt         = params.word_thold;
-                wparams.entropy_thold    = params.entropy_thold;
-                wparams.logprob_thold    = params.logprob_thold;
-                wparams.max_len          = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
-                wparams.audio_ctx        = params.audio_ctx;
-
-                wparams.greedy.best_of        = params.best_of;
-                wparams.beam_search.beam_size = params.beam_size;
-
-                wparams.initial_prompt   = params.prompt.c_str();
-
-                wparams.no_timestamps    = params.no_timestamps;
-
-                whisper_print_user_data user_data = { &params, &pcmf32s };
-
-                // This callback is called for each new segment
-                if (!wparams.print_realtime) {
-                    wparams.new_segment_callback           = whisper_print_segment_callback;
-                    wparams.new_segment_callback_user_data = &user_data;
-                }
-
-                // Set progress callback
-                wparams.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
-                    ProgressWorker* worker = static_cast<ProgressWorker*>(user_data);
-                    worker->OnProgress(progress);
-                };
-                wparams.progress_callback_user_data = this;
-
-                // Abort mechanism example
-                {
-                    static bool is_aborted = false; // Note: this should be atomic to avoid data races
-
-                    wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
-                        bool is_aborted = *(bool*)user_data;
-                        return !is_aborted;
-                    };
-                    wparams.encoder_begin_callback_user_data = &is_aborted;
-                }
-
-                if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
-                    fprintf(stderr, "failed to process audio\n");
-                    return 10;
-                }
-            }
-    }
-
-        const int n_segments = whisper_full_n_segments(ctx);
-        result.resize(n_segments);
-        for (int i = 0; i < n_segments; ++i) {
-            const char * text = whisper_full_get_segment_text(ctx, i);
-            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-            result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
-            result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
-            result[i].emplace_back(text);
-        }
-
-        whisper_print_timings(ctx);
-        whisper_free(ctx);
-
-        return 0;
-    }
+  whisper_params params;
+  std::vector<std::vector<std::string>> result;
 };

+
+
 Napi::Value whisper(const Napi::CallbackInfo& info) {
  Napi::Env env = info.Env();
  if (info.Length() <= 0 || !info[0].IsObject()) {
@ -367,23 +331,6 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
  int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
  bool comma_in_time = whisper_params.Get("comma_in_time").As<Napi::Boolean>();
  int32_t max_len = whisper_params.Get("max_len").As<Napi::Number>();
-  
-  // support prompt
-  std::string prompt = "";
-  if (whisper_params.Has("prompt") && whisper_params.Get("prompt").IsString()) {
-    prompt = whisper_params.Get("prompt").As<Napi::String>();
-  }
-  
-  // Add support for print_progress
-  bool print_progress = false;
-  if (whisper_params.Has("print_progress")) {
-    print_progress = whisper_params.Get("print_progress").As<Napi::Boolean>();
-  }
-  // Add support for progress_callback
-  Napi::Function progress_callback;
-  if (whisper_params.Has("progress_callback") && whisper_params.Get("progress_callback").IsFunction()) {
-    progress_callback = whisper_params.Get("progress_callback").As<Napi::Function>();
-  }

  Napi::Value pcmf32Value = whisper_params.Get("pcmf32");
  std::vector<float> pcmf32_vec;
@ -407,12 +354,9 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
  params.pcmf32 = pcmf32_vec;
  params.comma_in_time = comma_in_time;
  params.max_len = max_len;
-  params.print_progress = print_progress;
-  params.prompt = prompt;

  Napi::Function callback = info[1].As<Napi::Function>();
-  // Create a new Worker class with progress callback support
-  ProgressWorker* worker = new ProgressWorker(callback, params, progress_callback, env);
+  Worker* worker = new Worker(callback, params);
  worker->Queue();
  return env.Undefined();
 }
--- a/examples/addon.node/index.js
+++ b/examples/addon.node/index.js
@ -19,9 +19,6 @@ const whisperParams = {
  no_timestamps: false,
  audio_ctx: 0,
  max_len: 0,
-  progress_callback: (progress) => {
-      console.log(`progress: ${progress}%`);
-    }
 };

 const arguments = process.argv.slice(2);
--- a/examples/bench.wasm/README.md
+++ b/examples/bench.wasm/README.md
@ -2,7 +2,7 @@

 Benchmark the performance of whisper.cpp in the browser using WebAssembly

-Link: https://ggerganov.github.io/whisper.cpp/bench.wasm
+Link: https://whisper.ggerganov.com/bench/

 Terminal version: [examples/bench](/examples/bench)

@ -15,17 +15,7 @@ cd whisper.cpp
 mkdir build-em && cd build-em
 emcmake cmake ..
 make -j
-```
-The example can then be started by running a local HTTP server:
-```console
-python3 examples/server.py
-```
-And then opening a browser to the following URL:
-http://localhost:8000/bench.wasm

-To run the example in a different server, you need to copy the following files
-to the server's HTTP path:
-```
 # copy the produced page to your HTTP path
 cp bin/bench.wasm/*       /path/to/html/
 cp bin/libbench.worker.js /path/to/html/
--- a/examples/bench.wasm/index-tmpl.html
+++ b/examples/bench.wasm/index-tmpl.html
@ -24,8 +24,6 @@
                overflow-x: scroll;
            }
        </style>
-        <script src="../coi-serviceworker.js"></script>
-        <link rel="icon" href="data:,">
    </head>
    <body>
        <div id="main-container">
@ -38,10 +36,11 @@
            <br><br>

            <b>More examples:</b>
-                <a href="../">main</a> |
-                <a href="../bench.wasm/">bench</a> |
-                <a href="../stream.wasm">stream</a> |
-                <a href="../command.wasm/">command</a> |
+                <a href="https://whisper.ggerganov.com/">main</a> |
+                <a href="https://whisper.ggerganov.com/bench">bench</a> |
+                <a href="https://whisper.ggerganov.com/stream">stream</a> |
+                <a href="https://whisper.ggerganov.com/command">command</a> |
+                <a href="https://whisper.ggerganov.com/talk">talk</a> |

            <br><br>

--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -50,11 +50,11 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
+    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
+    fprintf(stderr, "  -fa,      --flash-attn  [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
    fprintf(stderr, "                           %-7s  0 - whisper\n",                                 "");
    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
-    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
-    fprintf(stderr, "  -fa,      --flash-attn  [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
    fprintf(stderr, "\n");
 }

--- a/examples/cli/cli.cpp
+++ b/examples/cli/cli.cpp
@ -1,5 +1,4 @@
 #include "common.h"
-#include "common-whisper.h"

 #include "whisper.h"
 #include "grammar-parser.h"
@ -7,15 +6,14 @@
 #include <cmath>
 #include <fstream>
 #include <cstdio>
+#include <regex>
 #include <string>
 #include <thread>
 #include <vector>
 #include <cstring>

 #if defined(_WIN32)
-#ifndef NOMINMAX
 #define NOMINMAX
-#endif
 #include <windows.h>
 #endif

@ -201,8 +199,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params

 static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options] file0 file1 ...\n", argv[0]);
-    fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n");
+    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
@ -247,7 +244,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
    fprintf(stderr, "  -dl,       --detect-language   [%-7s] exit after automatically detecting language\n",    params.detect_language ? "true" : "false");
    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt (max n_text_ctx/2 tokens)\n",       params.prompt.c_str());
    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
-    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input audio file path\n",                            "");
+    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
    fprintf(stderr, "  -dtw MODEL --dtw MODEL         [%-7s] compute token-level timestamps\n",                 params.dtw.c_str());
    fprintf(stderr, "  -ls,       --log-score         [%-7s] log best decoder scores of tokens\n",              params.log_score?"true":"false");
@ -1072,8 +1069,8 @@ int main(int argc, char ** argv) {
        std::vector<float> pcmf32;               // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM

-        if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
-            fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
+        if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
+            fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
            continue;
        }

--- a/examples/coi-serviceworker.js
+++ b/examples/coi-serviceworker.js
@ -1,146 +0,0 @@
-/*! coi-serviceworker v0.1.7 - Guido Zuidhof and contributors, licensed under MIT */
-let coepCredentialless = false;
-if (typeof window === 'undefined') {
-    self.addEventListener("install", () => self.skipWaiting());
-    self.addEventListener("activate", (event) => event.waitUntil(self.clients.claim()));
-
-    self.addEventListener("message", (ev) => {
-        if (!ev.data) {
-            return;
-        } else if (ev.data.type === "deregister") {
-            self.registration
-                .unregister()
-                .then(() => {
-                    return self.clients.matchAll();
-                })
-                .then(clients => {
-                    clients.forEach((client) => client.navigate(client.url));
-                });
-        } else if (ev.data.type === "coepCredentialless") {
-            coepCredentialless = ev.data.value;
-        }
-    });
-
-    self.addEventListener("fetch", function (event) {
-        const r = event.request;
-        if (r.cache === "only-if-cached" && r.mode !== "same-origin") {
-            return;
-        }
-
-        const request = (coepCredentialless && r.mode === "no-cors")
-            ? new Request(r, {
-                credentials: "omit",
-            })
-            : r;
-        event.respondWith(
-            fetch(request)
-                .then((response) => {
-                    if (response.status === 0) {
-                        return response;
-                    }
-
-                    const newHeaders = new Headers(response.headers);
-                    newHeaders.set("Cross-Origin-Embedder-Policy",
-                        coepCredentialless ? "credentialless" : "require-corp"
-                    );
-                    if (!coepCredentialless) {
-                        newHeaders.set("Cross-Origin-Resource-Policy", "cross-origin");
-                    }
-                    newHeaders.set("Cross-Origin-Opener-Policy", "same-origin");
-
-                    return new Response(response.body, {
-                        status: response.status,
-                        statusText: response.statusText,
-                        headers: newHeaders,
-                    });
-                })
-                .catch((e) => console.error(e))
-        );
-    });
-
-} else {
-    (() => {
-        const reloadedBySelf = window.sessionStorage.getItem("coiReloadedBySelf");
-        window.sessionStorage.removeItem("coiReloadedBySelf");
-        const coepDegrading = (reloadedBySelf == "coepdegrade");
-
-        // You can customize the behavior of this script through a global `coi` variable.
-        const coi = {
-            shouldRegister: () => !reloadedBySelf,
-            shouldDeregister: () => false,
-            coepCredentialless: () => true,
-            coepDegrade: () => true,
-            doReload: () => window.location.reload(),
-            quiet: false,
-            ...window.coi
-        };
-
-        const n = navigator;
-        const controlling = n.serviceWorker && n.serviceWorker.controller;
-
-        // Record the failure if the page is served by serviceWorker.
-        if (controlling && !window.crossOriginIsolated) {
-            window.sessionStorage.setItem("coiCoepHasFailed", "true");
-        }
-        const coepHasFailed = window.sessionStorage.getItem("coiCoepHasFailed");
-
-        if (controlling) {
-            // Reload only on the first failure.
-            const reloadToDegrade = coi.coepDegrade() && !(
-                coepDegrading || window.crossOriginIsolated
-            );
-            n.serviceWorker.controller.postMessage({
-                type: "coepCredentialless",
-                value: (reloadToDegrade || coepHasFailed && coi.coepDegrade())
-                    ? false
-                    : coi.coepCredentialless(),
-            });
-            if (reloadToDegrade) {
-                !coi.quiet && console.log("Reloading page to degrade COEP.");
-                window.sessionStorage.setItem("coiReloadedBySelf", "coepdegrade");
-                coi.doReload("coepdegrade");
-            }
-
-            if (coi.shouldDeregister()) {
-                n.serviceWorker.controller.postMessage({ type: "deregister" });
-            }
-        }
-
-        // If we're already coi: do nothing. Perhaps it's due to this script doing its job, or COOP/COEP are
-        // already set from the origin server. Also if the browser has no notion of crossOriginIsolated, just give up here.
-        if (window.crossOriginIsolated !== false || !coi.shouldRegister()) return;
-
-        if (!window.isSecureContext) {
-            !coi.quiet && console.log("COOP/COEP Service Worker not registered, a secure context is required.");
-            return;
-        }
-
-        // In some environments (e.g. Firefox private mode) this won't be available
-        if (!n.serviceWorker) {
-            !coi.quiet && console.error("COOP/COEP Service Worker not registered, perhaps due to private mode.");
-            return;
-        }
-
-        n.serviceWorker.register(window.document.currentScript.src).then(
-            (registration) => {
-                !coi.quiet && console.log("COOP/COEP Service Worker registered", registration.scope);
-
-                registration.addEventListener("updatefound", () => {
-                    !coi.quiet && console.log("Reloading page to make use of updated COOP/COEP Service Worker.");
-                    window.sessionStorage.setItem("coiReloadedBySelf", "updatefound");
-                    coi.doReload();
-                });
-
-                // If the registration is active, but it's not controlling the page
-                if (registration.active && !n.serviceWorker.controller) {
-                    !coi.quiet && console.log("Reloading page to make use of COOP/COEP Service Worker.");
-                    window.sessionStorage.setItem("coiReloadedBySelf", "notcontrolling");
-                    coi.doReload();
-                }
-            },
-            (err) => {
-                !coi.quiet && console.error("COOP/COEP Service Worker failed to register:", err);
-            }
-        );
-    })();
-}
--- a/examples/command.wasm/README.md
+++ b/examples/command.wasm/README.md
@ -3,7 +3,7 @@
 This is a basic Voice Assistant example that accepts voice commands from the microphone.
 It runs in fully in the browser via WebAseembly.

-Online demo: https://ggerganov.github.io/whisper.cpp/command.wasm
+Online demo: https://whisper.ggerganov.com/command/

 Terminal version: [examples/command](/examples/command)

@ -15,18 +15,9 @@ git clone https://github.com/ggerganov/whisper.cpp
 cd whisper.cpp
 mkdir build-em && cd build-em
 emcmake cmake ..
-make -j libcommand
-```
-The example can then be started by running a local HTTP server:
-```console
-python3 examples/server.py
-```
-And then opening a browser to the following URL:
-http://localhost:8000/command.wasm/
+make -j

-To run the example in a different server, you need to copy the following files
-to the server's HTTP path:
-```
+# copy the produced page to your HTTP path
 cp bin/command.wasm/*       /path/to/html/
 cp bin/libcommand.worker.js /path/to/html/
 ```
--- a/examples/command.wasm/index-tmpl.html
+++ b/examples/command.wasm/index-tmpl.html
@ -24,8 +24,6 @@
                overflow-x: scroll;
            }
        </style>
-        <script src="../coi-serviceworker.js"></script>
-        <link rel="icon" href="data:,">
    </head>
    <body>
        <div id="main-container">
@ -38,10 +36,11 @@
            <br><br>

            <b>More examples:</b>
-                <a href="../">main</a> |
-                <a href="../bench.wasm/">bench</a> |
-                <a href="../stream.wasm">stream</a> |
-                <a href="../command.wasm/">command</a> |
+                <a href="https://whisper.ggerganov.com/">main</a> |
+                <a href="https://whisper.ggerganov.com/bench">bench</a> |
+                <a href="https://whisper.ggerganov.com/stream">stream</a> |
+                <a href="https://whisper.ggerganov.com/command">command</a> |
+                <a href="https://whisper.ggerganov.com/talk">talk</a> |

            <br><br>

--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -11,15 +11,22 @@
 #include "whisper.h"
 #include "grammar-parser.h"

-#include <algorithm>
-#include <chrono>
+#include <sstream>
+#include <cassert>
 #include <cstdio>
 #include <fstream>
-#include <map>
-#include <sstream>
+#include <mutex>
+#include <regex>
 #include <string>
 #include <thread>
 #include <vector>
+#include <map>
+#include <chrono>
+
+#if defined(_WIN32)
+#define NOMINMAX
+#include <windows.h>
+#endif

 // command-line parameters
 struct whisper_params {
@ -678,6 +685,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
 }

 int main(int argc, char ** argv) {
+#if defined(_WIN32)
+    SetConsoleOutputCP(CP_UTF8);
+#endif
+
    whisper_params params;

    if (whisper_params_parse(argc, argv, params) == false) {
--- a/examples/common-sdl.cpp
+++ b/examples/common-sdl.cpp
@ -159,11 +159,15 @@ void audio_async::callback(uint8_t * stream, int len) {

            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
            memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
+
+            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
+            m_audio_len = m_audio.size();
        } else {
            memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
+
+            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
+            m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
        }
-        m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
-        m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
    }
 }

--- a/examples/common-whisper.cpp
+++ b/examples/common-whisper.cpp
@ -1,172 +0,0 @@
-#define _USE_MATH_DEFINES // for M_PI
-
-#include "common-whisper.h"
-
-#include "common.h"
-
-#include "whisper.h"
-
-// third-party utilities
-// use your favorite implementations
-#define STB_VORBIS_HEADER_ONLY
-#include "stb_vorbis.c"    /* Enables Vorbis decoding. */
-
-#ifdef _WIN32
-#ifndef NOMINMAX
-    #define NOMINMAX
-#endif
-#endif
-
-#define MA_NO_DEVICE_IO
-#define MA_NO_THREADING
-#define MA_NO_ENCODING
-#define MA_NO_GENERATION
-#define MA_NO_RESOURCE_MANAGER
-#define MA_NO_NODE_GRAPH
-#define MINIAUDIO_IMPLEMENTATION
-#include "miniaudio.h"
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4267) // possible loss of data
-#endif
-
-#ifdef _WIN32
-#include <fcntl.h>
-#include <io.h>
-#endif
-
-#include <cstring>
-#include <fstream>
-
-#ifdef WHISPER_FFMPEG
-// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
-extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
-#endif
-
-bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
-    std::vector<uint8_t> audio_data; // used for pipe input from stdin or ffmpeg decoding output
-
-    ma_result result;
-    ma_decoder_config decoder_config;
-    ma_decoder decoder;
-
-    decoder_config = ma_decoder_config_init(ma_format_f32, stereo ? 2 : 1, WHISPER_SAMPLE_RATE);
-
-    if (fname == "-") {
-		#ifdef _WIN32
-		_setmode(_fileno(stdin), _O_BINARY);
-		#endif
-
-		uint8_t buf[1024];
-		while (true)
-		{
-			const size_t n = fread(buf, 1, sizeof(buf), stdin);
-			if (n == 0) {
-				break;
-			}
-			audio_data.insert(audio_data.end(), buf, buf + n);
-		}
-
-		if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
-
-			fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result));
-
-			return false;
-		}
-
-		fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
-    }
-    else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
-#if defined(WHISPER_FFMPEG)
-		if (ffmpeg_decode_audio(fname, audio_data) != 0) {
-			fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());
-
-			return false;
-		}
-
-		if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
-			fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
-
-			return false;
-		}
-#else
-		if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
-			fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
-
-			return false;
-		}
-#endif
-    }
-
-    ma_uint64 frame_count;
-    ma_uint64 frames_read;
-
-    if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) {
-		fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result));
-
-		return false;
-    }
-
-    pcmf32.resize(stereo ? frame_count*2 : frame_count);
-
-    if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) {
-		fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result));
-
-		return false;
-    }
-
-    if (stereo) {
-		pcmf32s.resize(2);
-		pcmf32s[0].resize(frame_count);
-		pcmf32s[1].resize(frame_count);
-		for (uint64_t i = 0; i < frame_count; i++) {
-			pcmf32s[0][i] = pcmf32[2*i];
-			pcmf32s[1][i] = pcmf32[2*i + 1];
-		}
-    }
-
-    ma_decoder_uninit(&decoder);
-
-    return true;
-}
-
-//  500 -> 00:05.000
-// 6000 -> 01:00.000
-std::string to_timestamp(int64_t t, bool comma) {
-    int64_t msec = t * 10;
-    int64_t hr = msec / (1000 * 60 * 60);
-    msec = msec - hr * (1000 * 60 * 60);
-    int64_t min = msec / (1000 * 60);
-    msec = msec - min * (1000 * 60);
-    int64_t sec = msec / 1000;
-    msec = msec - sec * 1000;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
-
-    return std::string(buf);
-}
-
-int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
-    return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
-}
-
-bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id) {
-    std::ofstream speak_file(path.c_str());
-    if (speak_file.fail()) {
-        fprintf(stderr, "%s: failed to open speak_file\n", __func__);
-        return false;
-    } else {
-        speak_file.write(text.c_str(), text.size());
-        speak_file.close();
-        int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
-        if (ret != 0) {
-            fprintf(stderr, "%s: failed to speak\n", __func__);
-            return false;
-        }
-    }
-    return true;
-}
-
-#undef STB_VORBIS_HEADER_ONLY
-#include "stb_vorbis.c"
--- a/examples/common-whisper.h
+++ b/examples/common-whisper.h
@ -1,24 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-#include <cstdint>
-
-// Read WAV audio file and store the PCM data into pcmf32
-// fname can be a buffer of WAV data instead of a filename
-// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
-// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
-bool read_audio_data(
-        const std::string & fname,
-        std::vector<float> & pcmf32,
-        std::vector<std::vector<float>> & pcmf32s,
-        bool stereo);
-
-// convert timestamp to string, 6000 -> 01:00.000
-std::string to_timestamp(int64_t t, bool comma = false);
-
-// given a timestamp get the sample
-int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
-
-// write text to file, and call system("command voice_id file")
-bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -2,18 +2,33 @@

 #include "common.h"

+// third-party utilities
+// use your favorite implementations
+#define DR_WAV_IMPLEMENTATION
+#include "dr_wav.h"
+
 #include <cmath>
-#include <codecvt>
 #include <cstring>
 #include <fstream>
-#include <locale>
 #include <regex>
+#include <locale>
+#include <codecvt>
 #include <sstream>

 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif

+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+#ifdef WHISPER_FFMPEG
+// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
+extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
+#endif
+
 // Function to check if the next argument exists
 static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
    if (i + 1 < argc && argv[i + 1][0] != '-') {
@ -247,6 +262,17 @@ std::map<std::string, int32_t> json_parse(const std::string & fname) {
    return result;
 }

+std::string convert_to_utf8(const std::wstring & input) {
+    std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
+    return converter.to_bytes(input);
+}
+
+
+std::wstring convert_to_wstring(const std::string & input) {
+    std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
+    return converter.from_bytes(input);
+}
+
 void gpt_split_words(std::string str, std::vector<std::string>& words) {
    const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
    const std::regex re(pattern);
@ -598,6 +624,129 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(

 }

+bool is_wav_buffer(const std::string buf) {
+    // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
+    // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
+    if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") {
+        return false;
+    }
+
+    uint32_t chunk_size = *reinterpret_cast<const uint32_t*>(buf.data() + 4);
+    if (chunk_size + 8 != buf.size()) {
+        return false;
+    }
+
+    return true;
+}
+
+bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
+    drwav wav;
+    std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
+
+    if (fname == "-") {
+        {
+            #ifdef _WIN32
+            _setmode(_fileno(stdin), _O_BINARY);
+            #endif
+
+            uint8_t buf[1024];
+            while (true)
+            {
+                const size_t n = fread(buf, 1, sizeof(buf), stdin);
+                if (n == 0) {
+                    break;
+                }
+                wav_data.insert(wav_data.end(), buf, buf + n);
+            }
+        }
+
+        if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
+            fprintf(stderr, "error: failed to open WAV file from stdin\n");
+            return false;
+        }
+
+        fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
+    }
+    else if (is_wav_buffer(fname)) {
+        if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) {
+            fprintf(stderr, "error: failed to open WAV file from fname buffer\n");
+            return false;
+        }
+    }
+    else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
+#if defined(WHISPER_FFMPEG)
+        if (ffmpeg_decode_audio(fname, wav_data) != 0) {
+            fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
+            return false;
+        }
+        if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
+            fprintf(stderr, "error: failed to read wav data as wav \n");
+            return false;
+        }
+#else
+        fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
+        return false;
+#endif
+    }
+
+    if (wav.channels != 1 && wav.channels != 2) {
+        fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
+        drwav_uninit(&wav);
+        return false;
+    }
+
+    if (stereo && wav.channels != 2) {
+        fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
+        drwav_uninit(&wav);
+        return false;
+    }
+
+    if (wav.sampleRate != COMMON_SAMPLE_RATE) {
+        fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
+        drwav_uninit(&wav);
+        return false;
+    }
+
+    if (wav.bitsPerSample != 16) {
+        fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
+        drwav_uninit(&wav);
+        return false;
+    }
+
+    const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
+
+    std::vector<int16_t> pcm16;
+    pcm16.resize(n*wav.channels);
+    drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
+    drwav_uninit(&wav);
+
+    // convert to mono, float
+    pcmf32.resize(n);
+    if (wav.channels == 1) {
+        for (uint64_t i = 0; i < n; i++) {
+            pcmf32[i] = float(pcm16[i])/32768.0f;
+        }
+    } else {
+        for (uint64_t i = 0; i < n; i++) {
+            pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
+        }
+    }
+
+    if (stereo) {
+        // convert to stereo, float
+        pcmf32s.resize(2);
+
+        pcmf32s[0].resize(n);
+        pcmf32s[1].resize(n);
+        for (uint64_t i = 0; i < n; i++) {
+            pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
+            pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
+        }
+    }
+
+    return true;
+}
+
 void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
    const float rc = 1.0f / (2.0f * M_PI * cutoff);
    const float dt = 1.0f / sample_rate;
@ -673,7 +822,90 @@ float similarity(const std::string & s0, const std::string & s1) {
    return 1.0f - (dist / std::max(s0.size(), s1.size()));
 }

-bool is_file_exist(const char * filename) {
-    std::ifstream infile(filename);
+bool sam_params_parse(int argc, char ** argv, sam_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-s" || arg == "--seed") {
+            params.seed = std::stoi(argv[++i]);
+        } else if (arg == "-t" || arg == "--threads") {
+            params.n_threads = std::stoi(argv[++i]);
+        } else if (arg == "-m" || arg == "--model") {
+            params.model = argv[++i];
+        } else if (arg == "-i" || arg == "--inp") {
+            params.fname_inp = argv[++i];
+        } else if (arg == "-o" || arg == "--out") {
+            params.fname_out = argv[++i];
+        } else if (arg == "-h" || arg == "--help") {
+            sam_print_usage(argc, argv, params);
+            exit(0);
+        } else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            sam_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h, --help            show this help message and exit\n");
+    fprintf(stderr, "  -s SEED, --seed SEED  RNG seed (default: -1)\n");
+    fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME\n");
+    fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "  -i FNAME, --inp FNAME\n");
+    fprintf(stderr, "                        input file (default: %s)\n", params.fname_inp.c_str());
+    fprintf(stderr, "  -o FNAME, --out FNAME\n");
+    fprintf(stderr, "                        output file (default: %s)\n", params.fname_out.c_str());
+    fprintf(stderr, "\n");
+}
+
+//  500 -> 00:05.000
+// 6000 -> 01:00.000
+std::string to_timestamp(int64_t t, bool comma) {
+    int64_t msec = t * 10;
+    int64_t hr = msec / (1000 * 60 * 60);
+    msec = msec - hr * (1000 * 60 * 60);
+    int64_t min = msec / (1000 * 60);
+    msec = msec - min * (1000 * 60);
+    int64_t sec = msec / 1000;
+    msec = msec - sec * 1000;
+
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
+
+    return std::string(buf);
+}
+
+int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
+    return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
+}
+
+bool is_file_exist(const char *fileName)
+{
+    std::ifstream infile(fileName);
    return infile.good();
 }
+
+bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id)
+{
+    std::ofstream speak_file(path.c_str());
+    if (speak_file.fail()) {
+        fprintf(stderr, "%s: failed to open speak_file\n", __func__);
+        return false;
+    } else {
+        speak_file.write(text.c_str(), text.size());
+        speak_file.close();
+        int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
+        if (ret != 0) {
+            fprintf(stderr, "%s: failed to speak\n", __func__);
+            return false;
+        }
+    }
+    return true;
+}
--- a/examples/common.h
+++ b/examples/common.h
@ -11,6 +11,8 @@
 #include <fstream>
 #include <sstream>

+#define COMMON_SAMPLE_RATE 16000
+
 //
 // GPT CLI argument parsing
 //
@ -134,6 +136,19 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
 // Audio utils
 //

+// Check if a buffer is a WAV audio file
+bool is_wav_buffer(const std::string buf);
+
+// Read WAV audio file and store the PCM data into pcmf32
+// fname can be a buffer of WAV data instead of a filename
+// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
+// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
+bool read_wav(
+        const std::string & fname,
+        std::vector<float> & pcmf32,
+        std::vector<std::vector<float>> & pcmf32s,
+        bool stereo);
+
 // Write PCM data into WAV audio file
 class wav_writer {
 private:
@ -251,6 +266,23 @@ bool vad_simple(
 // compute similarity between two strings using Levenshtein distance
 float similarity(const std::string & s0, const std::string & s1);

+//
+// SAM argument parsing
+//
+
+struct sam_params {
+    int32_t seed      = -1; // RNG seed
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+
+    std::string model     = "models/sam-vit-b/ggml-model-f16.bin"; // model path
+    std::string fname_inp = "img.jpg";
+    std::string fname_out = "img.out";
+};
+
+bool sam_params_parse(int argc, char ** argv, sam_params & params);
+
+void sam_print_usage(int argc, char ** argv, const sam_params & params);
+
 //
 // Terminal utils
 //
@ -298,5 +330,14 @@ const std::vector<std::string> k_colors = {
 // Other utils
 //

+// convert timestamp to string, 6000 -> 01:00.000
+std::string to_timestamp(int64_t t, bool comma = false);
+
+// given a timestamp get the sample
+int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
+
 // check if file exists using ifstream
-bool is_file_exist(const char * filename);
+bool is_file_exist(const char *fileName);
+
+// write text to file, and call system("command voice_id file")
+bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);
--- a/examples/deprecation-warning/CMakeLists.txt
+++ b/examples/deprecation-warning/CMakeLists.txt
@ -1,6 +1,4 @@
 add_executable(main ./deprecation-warning.cpp)
 add_executable(bench ./deprecation-warning.cpp)
-if (WHISPER_SDL2)
-    add_executable(stream ./deprecation-warning.cpp)
-    add_executable(command ./deprecation-warning.cpp)
-endif()
+add_executable(stream ./deprecation-warning.cpp)
+add_executable(command ./deprecation-warning.cpp)
--- a/examples/dr_wav.h
+++ b/examples/dr_wav.h
--- a/examples/generate-karaoke.sh
+++ b/examples/generate-karaoke.sh
@ -41,17 +41,20 @@ fi
 # record some raw audio
 sox -d rec.wav

+# resample to 16kHz
+ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1
+
 # run Whisper
 echo "Processing ..."
-${executable} -m models/ggml-base.en.bin rec.wav -owts > /dev/null 2>&1
+${executable} -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1

 # generate Karaoke video
 echo "Generating video ..."
-source rec.wav.wts > /dev/null 2>&1
+source rec16.wav.wts > /dev/null 2>&1

 # play the video
 echo "Playing ./rec16.wav.mp4 ..."
-ffplay -loglevel 0 -autoexit ./rec.wav.mp4
+ffplay -loglevel 0 -autoexit ./rec16.wav.mp4

 echo "Done"
 exit 0
--- a/examples/lsp/lsp.cpp
+++ b/examples/lsp/lsp.cpp
@ -3,15 +3,15 @@
 #include "whisper.h"
 #include "json.hpp"

-#include <cassert>
-#include <chrono>
-#include <cstdio>
-#include <deque>
 #include <iostream>
-#include <set>
+#include <cassert>
+#include <cstdio>
 #include <string>
 #include <thread>
 #include <vector>
+#include <deque>
+#include <set>
+#include <chrono>

 using json = nlohmann::json;

--- a/examples/miniaudio.h
+++ b/examples/miniaudio.h
--- a/examples/server.py
+++ b/examples/server.py
@ -1,39 +0,0 @@
-import http.server
-import socketserver
-import os
-from pathlib import Path
-
-SCRIPT_DIR = Path(__file__).parent.absolute()
-DIRECTORY = os.path.join(SCRIPT_DIR, "../build-em/bin")
-DIRECTORY = os.path.abspath(DIRECTORY)
-
-class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, directory=DIRECTORY, **kwargs)
-
-    def do_GET(self):
-        # If requesting a worker file from any subdirectory
-        if '.worker.js' in self.path:
-            worker_file = os.path.basename(self.path)
-            worker_path = os.path.join(DIRECTORY, worker_file)
-
-            if os.path.exists(worker_path):
-                self.path = '/' + worker_file
-
-        return super().do_GET()
-
-    def end_headers(self):
-        # Add required headers for SharedArrayBuffer
-        self.send_header("Cross-Origin-Opener-Policy", "same-origin")
-        self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
-        self.send_header("Access-Control-Allow-Origin", "*");
-        super().end_headers()
-
-PORT = 8000
-
-with socketserver.TCPServer(("", PORT), CustomHTTPRequestHandler) as httpd:
-    print(f"Serving directory '{DIRECTORY}' at http://localhost:{PORT}")
-    try:
-        httpd.serve_forever()
-    except KeyboardInterrupt:
-        print("\nServer stopped.")
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1,18 +1,18 @@
 #include "common.h"
-#include "common-whisper.h"

 #include "whisper.h"
 #include "httplib.h"
 #include "json.hpp"

-#include <chrono>
 #include <cmath>
-#include <cstdio>
 #include <fstream>
-#include <sstream>
+#include <cstdio>
 #include <string>
 #include <thread>
 #include <vector>
+#include <cstring>
+#include <sstream>
+#include <chrono>

 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@ -723,8 +723,8 @@ int main(int argc, char ** argv) {
                return;
            }

-            // read audio content into pcmf32
-            if (!::read_audio_data(temp_filename, pcmf32, pcmf32s, params.diarize))
+            // read wav content into pcmf32
+            if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize))
            {
                fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
                const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
@ -735,10 +735,10 @@ int main(int argc, char ** argv) {
            // remove temp file
            std::remove(temp_filename.c_str());
        } else {
-            if (!::read_audio_data(audio_file.content, pcmf32, pcmf32s, params.diarize))
+            if (!::read_wav(audio_file.content, pcmf32, pcmf32s, params.diarize))
            {
-                fprintf(stderr, "error: failed to read audio data\n");
-                const std::string error_resp = "{\"error\":\"failed to read audio data\"}";
+                fprintf(stderr, "error: failed to read WAV file\n");
+                const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
                res.set_content(error_resp, "application/json");
                return;
            }
@ -1024,11 +1024,6 @@ int main(int argc, char ** argv) {
        // check if the model is in the file system
    });

-    svr.Get(sparams.request_path + "/health", [&](const Request &, Response &res){
-        const std::string health_response = "{\"status\":\"ok\"}";
-        res.set_content(health_response, "application/json");
-    });
-
    svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
        const char fmt[] = "500 Internal Server Error\n%s";
        char buf[BUFSIZ];
--- a/examples/stb_vorbis.c
+++ b/examples/stb_vorbis.c
--- a/examples/stream.wasm/README.md
+++ b/examples/stream.wasm/README.md
@ -13,17 +13,7 @@ cd whisper.cpp
 mkdir build-em && cd build-em
 emcmake cmake ..
 make -j
-```
-The example can then be started by running a local HTTP server:
-```console
-python3 examples/server.py
-```
-And then opening a browser to the following URL:
-http://localhost:8000/stream.wasm

-To run the example in a different server, you need to copy the following files
-to the server's HTTP path:
-```
 # copy the produced page to your HTTP path
 cp bin/stream.wasm/*       /path/to/html/
 cp bin/libstream.worker.js /path/to/html/
--- a/examples/stream.wasm/index-tmpl.html
+++ b/examples/stream.wasm/index-tmpl.html
@ -24,8 +24,6 @@
                overflow-x: scroll;
            }
        </style>
-        <script src="../coi-serviceworker.js"></script>
-        <link rel="icon" href="data:,">
    </head>
    <body>
        <div id="main-container">
@ -38,10 +36,11 @@
            <br><br>

            <b>More examples:</b>
-                <a href="../">main</a> |
-                <a href="../bench.wasm/">bench</a> |
-                <a href="../stream.wasm">stream</a> |
-                <a href="../command.wasm/">command</a> |
+                <a href="https://whisper.ggerganov.com/">main</a> |
+                <a href="https://whisper.ggerganov.com/bench">bench</a> |
+                <a href="https://whisper.ggerganov.com/stream">stream</a> |
+                <a href="https://whisper.ggerganov.com/command">command</a> |
+                <a href="https://whisper.ggerganov.com/talk">talk</a> |

            <br><br>

--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -4,15 +4,20 @@
 //
 #include "common-sdl.h"
 #include "common.h"
-#include "common-whisper.h"
 #include "whisper.h"

-#include <chrono>
+#include <cassert>
 #include <cstdio>
-#include <fstream>
 #include <string>
 #include <thread>
 #include <vector>
+#include <fstream>
+#include <chrono>
+
+#if defined(_WIN32)
+#define NOMINMAX
+#include <windows.h>
+#endif

 // command-line parameters
 struct whisper_params {
@ -23,7 +28,6 @@ struct whisper_params {
    int32_t capture_id = -1;
    int32_t max_tokens = 32;
    int32_t audio_ctx  = 0;
-    int32_t beam_size  = -1;

    float vad_thold    = 0.6f;
    float freq_thold   = 100.0f;
@ -60,7 +64,6 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
        else if (arg == "-c"    || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
        else if (arg == "-mt"   || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
        else if (arg == "-ac"   || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-bs"   || arg == "--beam-size")     { params.beam_size     = std::stoi(argv[++i]); }
        else if (arg == "-vth"  || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
        else if (arg == "-fth"  || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
        else if (arg == "-tr"   || arg == "--translate")     { params.translate     = true; }
@ -98,7 +101,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -c ID,    --capture ID    [%-7d] capture device ID\n",                              params.capture_id);
    fprintf(stderr, "  -mt N,    --max-tokens N  [%-7d] maximum number of tokens per audio chunk\n",       params.max_tokens);
    fprintf(stderr, "  -ac N,    --audio-ctx N   [%-7d] audio context size (0 - all)\n",                   params.audio_ctx);
-    fprintf(stderr, "  -bs N,    --beam-size N   [%-7d] beam size for beam search\n",                      params.beam_size);
    fprintf(stderr, "  -vth N,   --vad-thold N   [%-7.2f] voice activity detection threshold\n",           params.vad_thold);
    fprintf(stderr, "  -fth N,   --freq-thold N  [%-7.2f] high-pass frequency cutoff\n",                   params.freq_thold);
    fprintf(stderr, "  -tr,      --translate     [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
@ -116,6 +118,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
 }

 int main(int argc, char ** argv) {
+#if defined(_WIN32)
+    SetConsoleOutputCP(CP_UTF8);
+#endif
+
    whisper_params params;

    if (whisper_params_parse(argc, argv, params) == false) {
@ -160,6 +166,7 @@ int main(int argc, char ** argv) {
    cparams.use_gpu    = params.use_gpu;
    cparams.flash_attn = params.flash_attn;

+    fprintf(stderr, "whisper_init_from_file_with_params ...\n");
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);

    std::vector<float> pcmf32    (n_samples_30s, 0.0f);
@ -169,6 +176,8 @@ int main(int argc, char ** argv) {
    std::vector<whisper_token> prompt_tokens;

    // print some info about the processing
+    fprintf(stderr, "whisper_init_from_file_with_params ok\n");
+
    {
        fprintf(stderr, "\n");
        if (!whisper_is_multilingual(ctx)) {
@ -244,11 +253,6 @@ int main(int argc, char ** argv) {

        if (!use_vad) {
            while (true) {
-                // handle Ctrl + C
-                is_running = sdl_poll_events();
-                if (!is_running) {
-                    break;
-                }
                audio.get(params.step_ms, pcmf32_new);

                if ((int) pcmf32_new.size() > 2*n_samples_step) {
@ -306,7 +310,7 @@ int main(int argc, char ** argv) {

        // run the inference
        {
-            whisper_full_params wparams = whisper_full_default_params(params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY);
+            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

            wparams.print_progress   = false;
            wparams.print_special    = params.print_special;
@ -317,7 +321,6 @@ int main(int argc, char ** argv) {
            wparams.max_tokens       = params.max_tokens;
            wparams.language         = params.language.c_str();
            wparams.n_threads        = params.n_threads;
-            wparams.beam_search.beam_size = params.beam_size;

            wparams.audio_ctx        = params.audio_ctx;

--- a/examples/talk-llama/CMakeLists.txt
+++ b/examples/talk-llama/CMakeLists.txt
@ -25,7 +25,10 @@ if (WHISPER_SDL2)
        unicode-data.cpp)
    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})

-    target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+    if (WHISPER_CLBLAST)
+        set(CLBLAST_LIBNAME clblast)
+    endif ()
+    target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CLBLAST_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})

    if(WIN32)
        # It requires Windows 8.1 or later for PrefetchVirtualMemory
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@ -3,19 +3,24 @@

 #include "common-sdl.h"
 #include "common.h"
-#include "common-whisper.h"
 #include "whisper.h"
 #include "llama.h"

-#include <chrono>
+#include <cassert>
 #include <cstdio>
 #include <fstream>
 #include <regex>
-#include <regex>
-#include <sstream>
 #include <string>
 #include <thread>
 #include <vector>
+#include <regex>
+#include <sstream>
+#include <chrono>
+
+#if defined(_WIN32)
+#define NOMINMAX
+#include <windows.h>
+#endif

 static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
    const llama_model * model = llama_get_model(ctx);
@ -273,6 +278,10 @@ The transcript only includes text, it does not include markup like HTML and Mark
 {0}{4})";

 int main(int argc, char ** argv) {
+#if defined(_WIN32)
+    SetConsoleOutputCP(CP_UTF8);
+#endif
+
    whisper_params params;

    if (whisper_params_parse(argc, argv, params) == false) {
--- a/examples/wchess/libwchess/WChess.cpp
+++ b/examples/wchess/libwchess/WChess.cpp
@ -2,6 +2,7 @@
 #include "Chessboard.h"
 #include "grammar-parser.h"
 #include "common.h"
+#include <thread>
 #include <chrono>

 WChess::WChess(whisper_context * ctx,
--- a/examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt
@ -2,23 +2,15 @@ cmake_minimum_required(VERSION 3.10)

 project(whisper.cpp)

-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 11)
 set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)

 set(SOURCE_FILES
    ${WHISPER_LIB_DIR}/ggml/src/ggml.c
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/unary-ops.cpp
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/binary-ops.cpp
+    ${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
    ${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
    ${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
    ${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
-    ${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
    ${WHISPER_LIB_DIR}/src/whisper.cpp
    ${CMAKE_SOURCE_DIR}/jni.c
    )
@ -33,7 +25,6 @@ function(build_library target_name)
    )

    target_link_libraries(${target_name} ${LOG_LIB} android)
-    target_compile_definitions(${target_name} PUBLIC GGML_USE_CPU)

    if (${target_name} STREQUAL "whisper_v8fp16_va")
        target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
@ -66,4 +57,3 @@ include_directories(${WHISPER_LIB_DIR}/src)
 include_directories(${WHISPER_LIB_DIR}/include)
 include_directories(${WHISPER_LIB_DIR}/ggml/include)
 include_directories(${WHISPER_LIB_DIR}/ggml/src)
-include_directories(${WHISPER_LIB_DIR}/ggml/src/ggml-cpu)
--- a/examples/whisper.android.java/build.gradle
+++ b/examples/whisper.android.java/build.gradle
@ -16,10 +16,9 @@ allprojects {
  repositories {
    google()
    jcenter()
-    maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
  }
 }

 task clean(type: Delete) {
  delete rootProject.buildDir
-}
+}
--- a/examples/whisper.android.java/gradlew
+++ b/examples/whisper.android.java/gradlew
--- a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
@ -32,8 +32,6 @@ if (NOT GGML_HOME)
        ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
        ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
        ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
-        ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/unary-ops.cpp
-        ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/binary-ops.cpp
        )
 endif()

@ -46,8 +44,6 @@ function(build_library target_name)
        ${SOURCE_FILES}
    )

-    target_compile_definitions(${target_name} PUBLIC GGML_USE_CPU)
-
    if (${target_name} STREQUAL "whisper_v8fp16_va")
        target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
        set(GGML_COMPILE_OPTIONS                      -march=armv8.2-a+fp16)
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -11,23 +11,39 @@ https://user-images.githubusercontent.com/1991296/204126266-ce4177c6-6eca-4bd9-b

 ## Usage

-This example uses the whisper.xcframework which needs to be built first using the following command:
 ```bash
-./build_xcframework.sh
-```
+git clone https://github.com/ggerganov/whisper.cpp
+open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/

-A model is also required to be downloaded and can be done using the following command:
-```bash
-./models/download-ggml-model.sh base.en
-```
-
-If you don't want to convert a Core ML model, you can skip this step by creating dummy model:
-```bash
+# if you don't want to convert a Core ML model, you can skip this step by create dummy model
 mkdir models/ggml-base.en-encoder.mlmodelc
 ```

+Make sure to build the project in `Release`:
+
+<img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
+
+Also, don't forget to add the `-DGGML_USE_ACCELERATE` compiler flag for `ggml.c` in Build Phases.
+This can significantly improve the performance of the transcription:
+
+<img width="1072" alt="image" src="https://user-images.githubusercontent.com/1991296/208511239-8d7cdbd1-aa48-41b5-becd-ca288d53cc07.png">
+
 ## Core ML

-Follow the [`Core ML support` section of readme](../../README.md#core-ml-support) to convert the model.
-That is all the needs to be done to use the Core ML model in the app. The converted model is a
-resource in the project and will be used if it is available.
+If you want to enable Core ML support, you can add the `-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK` compiler flag for `whisper.cpp` in Build Phases:
+
+<img width="1072" alt="image" src="https://github.com/ggerganov/whisper.cpp/assets/3001525/103e8f57-6eb6-490d-a60c-f6cf6c319324">
+
+Then follow the [`Core ML support` section of readme](../../README.md#core-ml-support) for convert the model.
+
+In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.
+
+## Metal
+
+You can also enable Metal to make the inference run on the GPU of your device. This might or might not be more efficient
+compared to Core ML depending on the model and device that you use.
+
+To enable Metal, just add `-DGGML_USE_METAL` instead off the `-DWHISPER_USE_COREML` flag and you are ready.
+This will make both the Encoder and the Decoder run on the GPU.
+
+If you want to run the Encoder with Core ML and the Decoder with Metal then simply add both `-DWHISPER_USE_COREML -DGGML_USE_METAL` flags. That's all!
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
@ -7,6 +7,8 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
+		1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
 		18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
 		18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
 		18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
@ -14,12 +16,25 @@
 		18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
 		18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
 		18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
+		18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
+		18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
 		18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
+		18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
+		18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
+		18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
+		18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
+		18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
+		18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
+		18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
+		18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */; settings = {COMPILER_FLAGS = "-x c++"; }; };
+		18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
+		18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
+		433188B82D3A187C00E3FE79 /* gguf.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 433188B72D3A187C00E3FE79 /* gguf.cpp */; };
+		437B63E22D36280C002A49EC /* ggml-cpu-traits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */; };
 		7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
 		7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
+		7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
 		7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */ = {isa = PBXBuildFile; fileRef = 7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */; };
-		DDE3609F2D87EA8C004EA223 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DDE3609E2D87EA8C004EA223 /* whisper.xcframework */; };
-		DDE360A02D87EA8C004EA223 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = DDE3609E2D87EA8C004EA223 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
 /* End PBXBuildFile section */

 /* Begin PBXCopyFilesBuildPhase section */
@ -29,24 +44,18 @@
 			dstPath = "";
 			dstSubfolderSpec = 7;
 			files = (
+				18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */,
 			);
 			name = "Copy Files";
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		DDE360A12D87EA8C004EA223 /* Embed Frameworks */ = {
-			isa = PBXCopyFilesBuildPhase;
-			buildActionMask = 2147483647;
-			dstPath = "";
-			dstSubfolderSpec = 10;
-			files = (
-				DDE360A02D87EA8C004EA223 /* whisper.xcframework in Embed Frameworks */,
-			);
-			name = "Embed Frameworks";
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXCopyFilesBuildPhase section */

 /* Begin PBXFileReference section */
+		184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
+		184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
+		1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
+		1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
 		18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
 		18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
@ -59,7 +68,34 @@
 		18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
 		18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 		18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
+		18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../include/whisper.h; sourceTree = "<group>"; };
+		18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../src/whisper.cpp; sourceTree = "<group>"; };
+		18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml/src/ggml.c; sourceTree = "<group>"; };
+		18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml/include/ggml.h; sourceTree = "<group>"; };
 		18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
+		18A275FE2C2A94DE001C8D37 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../../ggml/include/ggml-metal.h"; sourceTree = "<group>"; };
+		18A275FF2C2A9563001C8D37 /* ggml-common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-common.h"; path = "../../../ggml/src/ggml-common.h"; sourceTree = "<group>"; };
+		18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
+		18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
+		18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml/src/ggml-backend-impl.h"; sourceTree = "<group>"; };
+		18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
+		18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
+		18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
+		18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
+		18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
+		18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
+		18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
+		18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
+		18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
+		18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp"; sourceTree = "<group>"; };
+		18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
+		18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
+		18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
+		18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
+		433188B72D3A187C00E3FE79 /* gguf.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = gguf.cpp; path = ../../../ggml/src/gguf.cpp; sourceTree = "<group>"; };
+		433188B92D3A18A400E3FE79 /* gguf.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = gguf.h; path = ../../../ggml/include/gguf.h; sourceTree = "<group>"; };
+		437B63E02D36280C002A49EC /* ggml-cpu-traits.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-traits.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-traits.h"; sourceTree = "<group>"; };
+		437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu-traits.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-traits.cpp"; sourceTree = "<group>"; };
 		7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
 		7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
 		7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
@ -67,7 +103,6 @@
 		7FE342492A0C3FA20015A058 /* whisper-encoder-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder-impl.h"; sourceTree = "<group>"; };
 		7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-decoder-impl.m"; sourceTree = "<group>"; };
 		7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = "ggml-base.en-encoder.mlmodelc"; path = "../../../models/ggml-base.en-encoder.mlmodelc"; sourceTree = "<group>"; };
-		DDE3609E2D87EA8C004EA223 /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../../build-apple/whisper.xcframework"; sourceTree = "<group>"; };
 /* End PBXFileReference section */

 /* Begin PBXFrameworksBuildPhase section */
@ -75,7 +110,6 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				DDE3609F2D87EA8C004EA223 /* whisper.xcframework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -86,7 +120,6 @@
 			isa = PBXGroup;
 			children = (
 				18627C7829052BDF00BD2A04 /* whisper.objc */,
-				DDE3609D2D87EA8C004EA223 /* Frameworks */,
 				18627C7729052BDF00BD2A04 /* Products */,
 			);
 			sourceTree = "<group>";
@ -102,9 +135,40 @@
 		18627C7829052BDF00BD2A04 /* whisper.objc */ = {
 			isa = PBXGroup;
 			children = (
+				433188B92D3A18A400E3FE79 /* gguf.h */,
+				433188B72D3A187C00E3FE79 /* gguf.cpp */,
+				18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
+				18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
+				18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */,
+				18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
+				437B63E02D36280C002A49EC /* ggml-cpu-traits.h */,
+				437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */,
+				18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
+				18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
+				18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
+				18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
+				18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
+				18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
+				18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
+				18A275FF2C2A9563001C8D37 /* ggml-common.h */,
+				18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
+				18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
+				18ABE1572AF556340044A204 /* ggml-backend.cpp */,
+				18ABE1552AF556340044A204 /* ggml-backend.h */,
+				18ABE1582AF556340044A204 /* ggml-impl.h */,
+				18ABE1592AF556340044A204 /* ggml-quants.c */,
+				18ABE1542AF556340044A204 /* ggml-quants.h */,
+				1844471D2AB2195F007D6BFE /* ggml-metal.metal */,
+				1844471B2AB21655007D6BFE /* ggml-metal.m */,
+				184447182AB211A2007D6BFE /* ggml-alloc.c */,
+				184447192AB211A2007D6BFE /* ggml-alloc.h */,
 				7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */,
 				7FE342442A0C3FA20015A058 /* coreml */,
 				18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */,
+				18627C9729052C6600BD2A04 /* ggml.h */,
+				18627C9529052C5800BD2A04 /* ggml.c */,
+				18627C9329052C4900BD2A04 /* whisper.cpp */,
+				18627C9229052C2B00BD2A04 /* whisper.h */,
 				18627C7929052BDF00BD2A04 /* AppDelegate.h */,
 				18627C7A29052BDF00BD2A04 /* AppDelegate.m */,
 				18627C7C29052BDF00BD2A04 /* SceneDelegate.h */,
@ -134,14 +198,6 @@
 			path = ../../../src/coreml;
 			sourceTree = "<group>";
 		};
-		DDE3609D2D87EA8C004EA223 /* Frameworks */ = {
-			isa = PBXGroup;
-			children = (
-				DDE3609E2D87EA8C004EA223 /* whisper.xcframework */,
-			);
-			name = Frameworks;
-			sourceTree = "<group>";
-		};
 /* End PBXGroup section */

 /* Begin PBXNativeTarget section */
@ -153,7 +209,6 @@
 				18627C7329052BDF00BD2A04 /* Frameworks */,
 				18627C7429052BDF00BD2A04 /* Resources */,
 				184447202AB21B25007D6BFE /* Copy Files */,
-				DDE360A12D87EA8C004EA223 /* Embed Frameworks */,
 			);
 			buildRules = (
 			);
@ -201,6 +256,7 @@
 			isa = PBXResourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */,
 				18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
 				7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
 				18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
@ -217,10 +273,25 @@
 			buildActionMask = 2147483647;
 			files = (
 				18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
+				18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */,
 				7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */,
+				18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
+				437B63E22D36280C002A49EC /* ggml-cpu-traits.cpp in Sources */,
+				18627C9629052C5800BD2A04 /* ggml.c in Sources */,
 				18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
+				7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
+				18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
+				18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
+				1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
+				18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.cpp in Sources */,
+				18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
+				18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
+				18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
 				18627C8C29052BE000BD2A04 /* main.m in Sources */,
 				18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
+				433188B82D3A187C00E3FE79 /* gguf.cpp in Sources */,
+				18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
+				1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
 				7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@ -298,7 +369,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = ../../../ggml/src/;
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@ -352,7 +423,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = ../../../ggml/src/;
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
@ -370,13 +441,12 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CLANG_CXX_LANGUAGE_STANDARD = "c++17";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_TEAM = P8JZH34X63;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GENERATE_INFOPLIST_FILE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = ../../../ggml/src/;
 				INFOPLIST_FILE = whisper.objc/Info.plist;
 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
 				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
@ -389,12 +459,10 @@
 				);
 				MARKETING_VERSION = 1.0;
 				MTL_HEADER_SEARCH_PATHS = "";
-				OTHER_CFLAGS = "-DGGML_USE_CPU=ON";
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				TARGETED_DEVICE_FAMILY = "1,2";
-				WARNING_CFLAGS = "-Wno-quoted-include-in-framework-header";
 			};
 			name = Debug;
 		};
@ -403,13 +471,12 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CLANG_CXX_LANGUAGE_STANDARD = "c++17";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_TEAM = P8JZH34X63;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GENERATE_INFOPLIST_FILE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = ../../../ggml/src/;
 				INFOPLIST_FILE = whisper.objc/Info.plist;
 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
 				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
@ -422,12 +489,10 @@
 				);
 				MARKETING_VERSION = 1.0;
 				MTL_HEADER_SEARCH_PATHS = "";
-				OTHER_CFLAGS = "-DGGML_USE_CPU=ON";
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				TARGETED_DEVICE_FAMILY = "1,2";
-				WARNING_CFLAGS = "-Wno-quoted-include-in-framework-header";
 			};
 			name = Release;
 		};
--- a/examples/whisper.objc/whisper.objc/ViewController.m
+++ b/examples/whisper.objc/whisper.objc/ViewController.m
@ -6,8 +6,8 @@
 //

 #import "ViewController.h"
-#import <whisper/whisper.h>

+#import "whisper.h"

 #define NUM_BYTES_PER_BUFFER 16*1024

@ -83,19 +83,6 @@ void AudioInputCallback(void * inUserData,
        stateInp.n_samples = 0;
        stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
        stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
-        // Set up audio session
-        NSError *error = nil;
-
-        [[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryRecord error:&error];
-        if (error) {
-            NSLog(@"Error setting audio session category: %@", error);
-        }
-
-        [[AVAudioSession sharedInstance] setActive:YES error:&error];
-        if (error) {
-            NSLog(@"Error activating audio session: %@", error);
-        }
-
    }

    stateInp.isTranscribing = false;
--- a/examples/whisper.swiftui/README.md
+++ b/examples/whisper.swiftui/README.md
@ -1,29 +1,7 @@
-# whisper.cpp/examples/whisper.swiftui
-
 A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions.
 See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc).

-### Building
- First whisper.cpp need to be built and a XCFramework needs to be created. This can be done by running
- the following script from the whisper.cpp project root:
- ```console
- $ ./build-xcframework.sh
- ```
-
-Note: if you get the error "iphoneos is not an iOS SDK" then you probably need to run this command first:
-```console
-sudo xcode-select -switch /Applications/Xcode.app/Contents/Developer
-```
-
- Open `whisper.swiftui.xcodeproj` project in Xcode and you should be able to build and run the app on
- a simulator or a real device.
-
- To use the framework with a different project, the XCFramework can be added to the project by
- adding `build-apple/whisper.xcframework` by dragging and dropping it into the project navigator, or
- by manually selecting the framework in the "Frameworks, Libraries, and Embedded Content" section
- of the project settings.
-
-### Usage
+**Usage**:

 1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1]
 2. Add the model to `whisper.swiftui.demo/Resources/models` **via Xcode**.
@ -33,19 +11,6 @@ sudo xcode-select -switch /Applications/Xcode.app/Contents/Developer

 **Note:** Pay attention to the folder path: `whisper.swiftui.demo/Resources/models` is the appropriate directory to place resources whilst `whisper.swiftui.demo/Models` is related to actual code.

-### Core ML support
-1. Follow all the steps in the `Usage` section, including adding the ggml model file.
-2. Follow the [`Core ML support` section of readme](../../README.md#core-ml-support) to convert the
-model.
-3. Add the Core ML model (`models/ggml-base.en-encoder.mlmodelc/`) to `whisper.swiftui.demo/Resources/models` **via Xcode**.
-
-When the example starts running you should now see that it is using the Core ML model:
-```console
-whisper_init_state: loading Core ML model from '/Library/Developer/CoreSimulator/Devices/25E8C27D-0253-4281-AF17-C3F2A4D1D8F4/data/Containers/Bundle/Application/3ADA7D59-7B9C-43B4-A7E1-A87183FC546A/whisper.swiftui.app/models/ggml-base.en-encoder.mlmodelc'
-whisper_init_state: first run on a device may take a while ...
-whisper_init_state: Core ML model loaded
-```
-
 [^1]: I recommend the tiny, base or small models for running on an iOS device.

 [^2]: The `Release` build can boost performance of transcription. In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
@ -17,26 +17,11 @@
 		0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
 		0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
 		0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
-		5B3454FF2D8178F80005A3BC /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5B3454FE2D8178F80005A3BC /* whisper.xcframework */; };
-		5B3455002D8178F80005A3BC /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 5B3454FE2D8178F80005A3BC /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
 		7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */; };
 		7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0EF2CE0C6F700ACD7BF /* Model.swift */; };
+		E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
 /* End PBXBuildFile section */

-/* Begin PBXCopyFilesBuildPhase section */
-		5B3455012D8178F80005A3BC /* Embed Frameworks */ = {
-			isa = PBXCopyFilesBuildPhase;
-			buildActionMask = 2147483647;
-			dstPath = "";
-			dstSubfolderSpec = 10;
-			files = (
-				5B3455002D8178F80005A3BC /* whisper.xcframework in Embed Frameworks */,
-			);
-			name = "Embed Frameworks";
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXCopyFilesBuildPhase section */
-
 /* Begin PBXFileReference section */
 		0A8E48FF2954B3F100704C1B /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		0AA751462953AC2E001EE061 /* samples */ = {isa = PBXFileReference; lastKnownFileType = folder; path = samples; sourceTree = "<group>"; };
@ -50,9 +35,9 @@
 		0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
 		0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
 		0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
-		5B3454FE2D8178F80005A3BC /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../../build-apple/whisper.xcframework"; sourceTree = "<group>"; };
 		7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
 		7F79E0EF2CE0C6F700ACD7BF /* Model.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
+		E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
 /* End PBXFileReference section */

 /* Begin PBXFrameworksBuildPhase section */
@ -60,7 +45,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				5B3454FF2D8178F80005A3BC /* whisper.xcframework in Frameworks */,
+				E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -97,6 +82,7 @@
 		0AAC5D8E29539CCF003032C3 = {
 			isa = PBXGroup;
 			children = (
+				E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
 				0A8E48FF2954B3F100704C1B /* README.md */,
 				0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
 				0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
@ -155,7 +141,6 @@
 		E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
-				5B3454FE2D8178F80005A3BC /* whisper.xcframework */,
 			);
 			name = Frameworks;
 			sourceTree = "<group>";
@ -170,7 +155,6 @@
 				0AAC5D9329539CCF003032C3 /* Sources */,
 				0AAC5D9429539CCF003032C3 /* Frameworks */,
 				0AAC5D9529539CCF003032C3 /* Resources */,
-				5B3455012D8178F80005A3BC /* Embed Frameworks */,
 			);
 			buildRules = (
 			);
@ -178,6 +162,7 @@
 			);
 			name = whisper.swiftui;
 			packageProductDependencies = (
+				E3F92DC42AFA8E3800A6A9D4 /* whisper */,
 			);
 			productName = WhisperCppDemo;
 			productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
@ -471,6 +456,13 @@
 			defaultConfigurationName = Release;
 		};
 /* End XCConfigurationList section */
+
+/* Begin XCSwiftPackageProductDependency section */
+		E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
+			isa = XCSwiftPackageProductDependency;
+			productName = whisper;
+		};
+/* End XCSwiftPackageProductDependency section */
 	};
 	rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
 }
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -32,9 +32,8 @@ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE_STRICT=0 \
-    -s INITIAL_MEMORY=512MB \
-    -s MAXIMUM_MEMORY=2000MB \
-    -s ALLOW_MEMORY_GROWTH=1 \
+    -s INITIAL_MEMORY=2000MB \
+    -s TOTAL_MEMORY=2000MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
--- a/examples/whisper.wasm/README.md
+++ b/examples/whisper.wasm/README.md
@ -22,7 +22,7 @@ audio is limited to 120 seconds.

 ## Live demo

-Link: https://ggerganov.github.io/whisper.cpp/
+Link: https://whisper.ggerganov.com

 ![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)

@ -35,17 +35,7 @@ cd whisper.cpp
 mkdir build-em && cd build-em
 emcmake cmake ..
 make -j
-```
-The example can then be started by running a local HTTP server:
-```console
-python3 examples/server.py
-```
-And then opening a browser to the following URL:
-http://localhost:8000/whisper.wasm

-To run the example in a different server, you need to copy the following files
-to the server's HTTP path:
-```
 # copy the produced page to your HTTP path
 cp bin/whisper.wasm/*    /path/to/html/
 cp bin/libmain.worker.js /path/to/html/
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -24,8 +24,6 @@
                overflow-x: scroll;
            }
        </style>
-        <script src="coi-serviceworker.js"></script>
-        <link rel="icon" href="data:,">
    </head>
    <body>
        <div id="main-container">
@ -49,9 +47,11 @@
                </ul>

            <b>More examples:</b>
-                <a href="bench.wasm/">bench</a> |
-                <a href="stream.wasm">stream</a> |
-                <a href="command.wasm/">command</a> |
+                <a href="https://whisper.ggerganov.com/">main</a> |
+                <a href="https://whisper.ggerganov.com/bench">bench</a> |
+                <a href="https://whisper.ggerganov.com/stream">stream</a> |
+                <a href="https://whisper.ggerganov.com/command">command</a> |
+                <a href="https://whisper.ggerganov.com/talk">talk</a> |

            <hr>

@ -614,7 +614,7 @@
            var nthreads = 8;

            function changeThreads(value) {
-                nthreads = parseInt(value, 10);
+                nthreads = value;
                document.getElementById('threads-value').innerHTML = nthreads;
            }

--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@ -100,17 +100,11 @@ else()
    set(INS_ENB ON)
 endif()

-message(DEBUG "GGML_NATIVE         : ${GGML_NATIVE}")
-message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
-message(DEBUG "INS_ENB             : ${INS_ENB}")
-
 option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF)
 option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
-option(GGML_CPU_KLEIDIAI     "ggml: use KleidiAI optimized kernels if applicable" OFF)
 option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB})
 option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF)
 option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB})
-option(GGML_BMI2             "ggml: enable BMI2"             ${INS_ENB})
 option(GGML_AVX512           "ggml: enable AVX512F"          OFF)
 option(GGML_AVX512_VBMI      "ggml: enable AVX512-VBMI"      OFF)
 option(GGML_AVX512_VNNI      "ggml: enable AVX512-VNNI"      OFF)
@ -127,12 +121,9 @@ endif()
 option(GGML_LASX             "ggml: enable lasx"             ON)
 option(GGML_LSX              "ggml: enable lsx"              ON)
 option(GGML_RVV              "ggml: enable rvv"              ON)
-option(GGML_RV_ZFH           "ggml: enable riscv zfh"        OFF)
-option(GGML_VXE              "ggml: enable vxe"              ON)

 option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
-set(GGML_CPU_ARM_ARCH        "" CACHE STRING "ggml: CPU architecture for ARM")
-set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
+set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")


 if (WIN32)
@ -159,17 +150,12 @@ set   (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
                                            "ggml: max. batch size for using peer access")
 option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copies"            OFF)
 option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
-option(GGML_CUDA_FA                         "ggml: compile ggml FlashAttention CUDA kernels"  ON)
 option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
 option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT})
-set   (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
-                                            "ggml: cuda link binary compression mode; requires cuda 12.8+")
-set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")

 option(GGML_HIP                             "ggml: use HIP"                                   OFF)
 option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF)
 option(GGML_HIP_NO_VMM                      "ggml: do not try to use HIP VMM"                 ON)
-option(GGML_HIP_ROCWMMA_FATTN               "ggml: enable rocWMMA for FlashAttention"         OFF)
 option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF)
 option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF)
 option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF)
@ -192,7 +178,6 @@ option(GGML_OPENMP                          "ggml: use OpenMP"
 option(GGML_RPC                             "ggml: use RPC"                                   OFF)
 option(GGML_SYCL                            "ggml: use SYCL"                                  OFF)
 option(GGML_SYCL_F16                        "ggml: use 16 bit floats for sycl calculations"   OFF)
-option(GGML_SYCL_GRAPH                      "ggml: enable graphs in the SYCL backend"         ON)
 set   (GGML_SYCL_TARGET "INTEL" CACHE STRING
                                            "ggml: sycl target device")
 set   (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
@ -202,8 +187,6 @@ option(GGML_OPENCL                          "ggml: use OpenCL"
 option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increases overhead)" OFF)
 option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON)
 option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
-set   (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
-                                            "gmml: OpenCL API version to target")

 # toolchain for vulkan-shaders-gen
 set   (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@ -226,8 +209,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)

 find_package(Threads REQUIRED)

-include(GNUInstallDirs)
-
 #
 # build the library
 #
@ -251,6 +232,7 @@ endif ()
 # install
 #

+include(GNUInstallDirs)
 include(CMakePackageConfigHelpers)

 # all public headers
@ -261,7 +243,6 @@ set(GGML_PUBLIC_HEADERS
    include/ggml-backend.h
    include/ggml-blas.h
    include/ggml-cann.h
-    include/ggml-cpp.h
    include/ggml-cuda.h
    include/ggml-kompute.h
    include/ggml-opt.h
--- a/ggml/cmake/common.cmake
+++ b/ggml/cmake/common.cmake
@ -1,26 +0,0 @@
-function(ggml_get_flags CCID CCVER)
-    set(C_FLAGS "")
-    set(CXX_FLAGS "")
-
-    if (CCID MATCHES "Clang")
-        set(C_FLAGS   -Wunreachable-code-break -Wunreachable-code-return)
-        set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
-
-        if (
-            (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
-            (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
-        )
-            list(APPEND C_FLAGS -Wdouble-promotion)
-        endif()
-    elseif (CCID STREQUAL "GNU")
-        set(C_FLAGS   -Wdouble-promotion)
-        set(CXX_FLAGS -Wno-array-bounds)
-
-        if (CCVER VERSION_GREATER_EQUAL 8.1.0)
-            list(APPEND CXX_FLAGS -Wextra-semi)
-        endif()
-    endif()
-
-    set(GF_C_FLAGS   ${C_FLAGS}   PARENT_SCOPE)
-    set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
-endfunction()
--- a/ggml/cmake/ggml-config.cmake.in
+++ b/ggml/cmake/ggml-config.cmake.in
@ -5,7 +5,7 @@

 set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
 set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
-#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
+set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")

 find_package(Threads REQUIRED)

@ -112,7 +112,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})

    string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
    if(is_cpu_variant)
-        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
        set_target_properties(ggml::${_ggml_backend}
           PROPERTIES
               INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
@ -124,7 +124,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
        endif()

    else()
-        list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
+        list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
        set_target_properties(ggml::${_ggml_backend}
            PROPERTIES
                INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
@ -139,11 +139,6 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
    list(APPEND _ggml_all_targets ggml::${_ggml_backend})
 endforeach()

-list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
-set_target_properties(ggml::ggml
-    PROPERTIES
-        INTERFACE_LINK_LIBRARIES "${GGML_INTERFACE_LINK_LIBRARIES}")
-
 add_library(ggml::all INTERFACE IMPORTED)
 set_target_properties(ggml::all
    PROPERTIES
--- a/ggml/include/ggml-alloc.h
+++ b/ggml/include/ggml-alloc.h
@ -19,7 +19,7 @@ struct ggml_tallocr {
 };

 GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
-GGML_API enum ggml_status    ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
+GGML_API void                ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);

 // Graph allocator
 /*
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@ -56,7 +56,7 @@ extern "C" {
    GGML_API void                           ggml_backend_buffer_free          (ggml_backend_buffer_t buffer);
    GGML_API void *                         ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer);
    GGML_API size_t                         ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer);
-    GGML_API enum ggml_status               ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API void                           ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
    GGML_API size_t                         ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
    GGML_API size_t                         ggml_backend_buffer_get_max_size  (ggml_backend_buffer_t buffer);
    GGML_API size_t                         ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@ -342,8 +342,8 @@ extern "C" {
    GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);

    // Tensor initialization
-    GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
-    GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
+    GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
+    GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);

    // CPU buffer types are always available
    GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
--- a/ggml/include/ggml-cpu.h
+++ b/ggml/include/ggml-cpu.h
@ -8,7 +8,7 @@ extern "C" {
 #endif

    // the compute plan that needs to be prepared for ggml_graph_compute()
-    // since https://github.com/ggml-org/ggml/issues/287
+    // since https://github.com/ggerganov/ggml/issues/287
    struct ggml_cplan {
        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@ -80,7 +80,6 @@ extern "C" {
    GGML_BACKEND_API int ggml_cpu_has_avx        (void);
    GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void);
    GGML_BACKEND_API int ggml_cpu_has_avx2       (void);
-    GGML_BACKEND_API int ggml_cpu_has_bmi2       (void);
    GGML_BACKEND_API int ggml_cpu_has_f16c       (void);
    GGML_BACKEND_API int ggml_cpu_has_fma        (void);
    GGML_BACKEND_API int ggml_cpu_has_avx512     (void);
@ -96,11 +95,9 @@ extern "C" {
    GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
    GGML_BACKEND_API int ggml_cpu_has_sve        (void);
    GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes
-    GGML_BACKEND_API int ggml_cpu_has_sme        (void);
    // other
    GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
    GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
-    GGML_BACKEND_API int ggml_cpu_has_vxe        (void);
    GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
    GGML_BACKEND_API int ggml_cpu_has_llamafile  (void);

--- a/ggml/include/ggml-metal.h
+++ b/ggml/include/ggml-metal.h
@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);

 GGML_DEPRECATED(
        GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
-        "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
+        "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");

 GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);

--- a/ggml/include/ggml-rpc.h
+++ b/ggml/include/ggml-rpc.h
@ -17,9 +17,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c

 GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);

-GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
-                                                    const char * cache_dir,
-                                                    size_t free_mem, size_t total_mem);
+GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);

 GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);

--- a/ggml/include/ggml-vulkan.h
+++ b/ggml/include/ggml-vulkan.h
@ -10,6 +10,8 @@ extern "C" {
 #define GGML_VK_NAME "Vulkan"
 #define GGML_VK_MAX_DEVICES 16

+GGML_BACKEND_API void ggml_vk_instance_init(void);
+
 // backend API
 GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);

--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@ -198,7 +198,7 @@

 #ifndef __GNUC__
 #    define GGML_ATTRIBUTE_FORMAT(...)
-#elif defined(__MINGW32__) && !defined(__clang__)
+#elif defined(__MINGW32__)
 #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
 #else
 #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@ -454,7 +454,6 @@ extern "C" {
        GGML_OP_RMS_NORM,
        GGML_OP_RMS_NORM_BACK,
        GGML_OP_GROUP_NORM,
-        GGML_OP_L2_NORM,

        GGML_OP_MUL_MAT,
        GGML_OP_MUL_MAT_ID,
@ -503,7 +502,6 @@ extern "C" {
        GGML_OP_ADD_REL_POS,
        GGML_OP_RWKV_WKV6,
        GGML_OP_GATED_LINEAR_ATTN,
-        GGML_OP_RWKV_WKV7,

        GGML_OP_UNARY,

@ -1097,18 +1095,6 @@ extern "C" {
            int                   n_groups,
            float                 eps);

-    // l2 normalize along rows
-    // used in rwkv v7
-    GGML_API struct ggml_tensor * ggml_l2_norm(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            float                 eps);
-
-    GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            float                 eps);
-
    // a - x
    // b - dy
    GGML_API struct ggml_tensor * ggml_rms_norm_back(
@ -1791,11 +1777,11 @@ extern "C" {

 #define GGML_KQ_MASK_PAD 64

-    // q:    [n_embd_k, n_batch,     n_head,    1]
-    // k:    [n_embd_k, n_kv,        n_head_kv, 1]
-    // v:    [n_embd_v, n_kv,        n_head_kv, 1] !! not transposed !!
-    // mask: [n_kv,     n_batch_pad, 1,         1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
-    // res:  [n_embd_v, n_head,      n_batch,   1] !! permuted !!
+    // q:    [n_embd, n_batch,     n_head,    1]
+    // k:    [n_embd, n_kv,        n_head_kv, 1]
+    // v:    [n_embd, n_kv,        n_head_kv, 1] !! not transposed !!
+    // mask: [n_kv,   n_batch_pad, 1,         1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
+    // res:  [n_embd, n_head,      n_batch,   1] !! permuted !!
    GGML_API struct ggml_tensor * ggml_flash_attn_ext(
            struct ggml_context * ctx,
            struct ggml_tensor  * q,
@ -1904,16 +1890,6 @@ extern "C" {
            struct ggml_tensor  * state,
            float scale);

-    GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * r,
-            struct ggml_tensor  * w,
-            struct ggml_tensor  * k,
-            struct ggml_tensor  * v,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
-            struct ggml_tensor  * state);
-
    // custom operators

    typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@ -2164,11 +2140,7 @@ extern "C" {
 #        define GGML_RESTRICT
 #    endif
 #else
-#    if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
-#        define GGML_RESTRICT __restrict
-#    else
-#        define GGML_RESTRICT restrict
-#    endif
+#    define GGML_RESTRICT restrict
 #endif
    typedef void (*ggml_to_float_t)  (const void  * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
    typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int64_t k);
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@ -1,5 +1,4 @@
 include(CheckCXXCompilerFlag)
-include("../cmake/common.cmake")

 add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})

@ -25,6 +24,33 @@ if (NOT MSVC)
    endif()
 endif()

+function(ggml_get_flags CCID CCVER)
+    set(C_FLAGS "")
+    set(CXX_FLAGS "")
+
+    if (CCID MATCHES "Clang")
+        set(C_FLAGS   -Wunreachable-code-break -Wunreachable-code-return)
+        set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
+
+        if (
+            (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
+            (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
+        )
+            list(APPEND C_FLAGS -Wdouble-promotion)
+        endif()
+    elseif (CCID STREQUAL "GNU")
+        set(C_FLAGS   -Wdouble-promotion)
+        set(CXX_FLAGS -Wno-array-bounds)
+
+        if (CCVER VERSION_GREATER_EQUAL 8.1.0)
+            list(APPEND CXX_FLAGS -Wextra-semi)
+        endif()
+    endif()
+
+    set(GF_C_FLAGS   ${C_FLAGS}   PARENT_SCOPE)
+    set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
+endfunction()
+
 if (GGML_FATAL_WARNINGS)
    if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
        list(APPEND C_FLAGS   -Werror)
@ -65,7 +91,7 @@ if (GGML_LTO)
    endif()
 endif()

-if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
+if (GGML_CCACHE)
    find_program(GGML_CCACHE_FOUND ccache)
    find_program(GGML_SCCACHE_FOUND sccache)

@ -76,11 +102,7 @@ if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAU
            set(GGML_CCACHE_VARIANT sccache)
        endif()
        # TODO: should not be set globally
-        if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32)
-            set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl")
-        else ()
-            set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
-        endif ()
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
        set(ENV{CCACHE_SLOPPINESS} time_macros)
        message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
    else()
@ -204,9 +226,6 @@ add_library(ggml-base
            gguf.cpp)

 target_include_directories(ggml-base PRIVATE .)
-if (GGML_BACKEND_DL)
-    target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
-endif()

 add_library(ggml
            ggml-backend-reg.cpp)
@ -214,7 +233,7 @@ add_library(ggml
 target_link_libraries(ggml PUBLIC ggml-base)

 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
-    target_link_libraries(ggml PRIVATE dl stdc++fs)
+    target_link_libraries(ggml PRIVATE dl)
 endif()

 function(ggml_add_backend_library backend)
@ -267,7 +286,7 @@ function(ggml_add_cpu_backend_variant tag_name)
    set(GGML_CPU_TAG_NAME ${tag_name})
    # other: OPENMP LLAMAFILE CPU_HBM
    foreach (feat NATIVE
-                  AVX AVX2 BMI2 AVX_VNNI FMA F16C
+                  AVX AVX2 AVX_VNNI FMA F16C
                  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
                  AMX_TILE AMX_INT8 AMX_BF16)
        set(GGML_${feat} OFF)
@ -287,13 +306,13 @@ if (GGML_CPU_ALL_VARIANTS)
        message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
    endif()
    ggml_add_cpu_backend_variant(sandybridge    AVX)
-    ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 BMI2 FMA)
-    ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 BMI2 FMA AVX512)
-    ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
-    ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 BMI2 FMA AVX_VNNI)
+    ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 FMA)
+    ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512)
+    ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
+    ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
    if (NOT MSVC)
        # MSVC doesn't support AMX
-        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
+        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
    endif()
 elseif (GGML_CPU)
    ggml_add_cpu_backend_variant_impl("")
@ -329,10 +348,6 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
    target_link_libraries(ggml-base PRIVATE dl)
 endif()

-if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
-    target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
-endif()
-
 if (BUILD_SHARED_LIBS)
    foreach (target ggml-base ggml)
        set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
    return talloc;
 }

-enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
+void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
    size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
    size = GGML_PAD(size, talloc->alignment);

@ -104,7 +104,7 @@ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_te

    assert(((uintptr_t)addr % talloc->alignment) == 0);

-    return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
+    ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
 }

 // dynamic tensor allocator
@ -933,51 +933,42 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {

 // utils

-static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
-    for (size_t i = 0; i < *n_buffers; i++) {
-        ggml_backend_buffer_free((*buffers)[i]);
-    }
-    free(*buffers);
-}
-
 static bool alloc_tensor_range(struct ggml_context * ctx,
        struct ggml_tensor * first, struct ggml_tensor * last,
        ggml_backend_buffer_type_t buft, size_t size,
        ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
-
    ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
    if (buffer == NULL) {
-        GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
-        free_buffers(buffers, n_buffers);
+#ifndef NDEBUG
+        GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
+#endif
+        for (size_t i = 0; i < *n_buffers; i++) {
+            ggml_backend_buffer_free((*buffers)[i]);
+        }
+        free(*buffers);
        return false;
    }

-    *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
-    (*buffers)[(*n_buffers)++] = buffer;
-
    struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);

    for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
-        enum ggml_status status = GGML_STATUS_SUCCESS;
        if (t->data == NULL) {
            if (t->view_src == NULL) {
-                status = ggml_tallocr_alloc(&tallocr, t);
+                ggml_tallocr_alloc(&tallocr, t);
            } else if (t->buffer == NULL) {
-                status = ggml_backend_view_init(t);
+                ggml_backend_view_init(t);
            }
        } else {
            if (t->view_src != NULL && t->buffer == NULL) {
                // view of a pre-allocated tensor
-                status = ggml_backend_view_init(t);
+                ggml_backend_view_init(t);
            }
        }
-        if (status != GGML_STATUS_SUCCESS) {
-            GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
-            free_buffers(buffers, n_buffers);
-            return false;
-        }
    }

+    *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
+    (*buffers)[(*n_buffers)++] = buffer;
+
    return true;
 }

@ -998,7 +989,19 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
            this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
        }

-        if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
+        if (this_size > max_size) {
+            GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
+                    __func__, t->name,
+                    ggml_backend_buft_name(buft),
+                    this_size, max_size);
+            for (size_t i = 0; i < n_buffers; i++) {
+                ggml_backend_buffer_free(buffers[i]);
+            }
+            free(buffers);
+            return NULL;
+        }
+
+        if ((cur_buf_size + this_size) > max_size) {
            // allocate tensors in the current buffer
            if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
                return NULL;
--- a/ggml/src/ggml-backend-impl.h
+++ b/ggml/src/ggml-backend-impl.h
@ -44,7 +44,7 @@ extern "C" {
        // base address of the buffer
        void *       (*get_base)     (ggml_backend_buffer_t buffer);
        // (optional) initialize a tensor in the buffer (eg. add tensor extras)
-        enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+        void         (*init_tensor)  (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
        // tensor data access
        void         (*memset_tensor)(ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size);
        void         (*set_tensor)   (ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Judd	00ddb10fe2	select utf8 codepage on windows	2025-02-19 17:00:39 +08:00
Judd	b3a6018bbf	fix building with MSVC + SDL2	2025-02-19 14:43:42 +08:00