Compare commits

..

2 Commits

Author SHA1 Message Date
00ddb10fe2 select utf8 codepage on windows 2025-02-19 17:00:39 +08:00
b3a6018bbf fix building with MSVC + SDL2 2025-02-19 14:43:42 +08:00
298 changed files with 20457 additions and 128088 deletions

View File

@ -19,12 +19,7 @@ on:
- ggml/**/*.m
- ggml/**/*.metal
- scripts/get-flags.mk
- examples/common.h
- examples/common.cpp
- examples/common-whisper.h
- examples/common-whisper.cpp
- examples/stb_vorbis.c
- examples/miniaudio.h
- examples/dr_wav.h
pull_request:
paths:
- bindings/ruby/**
@ -44,12 +39,7 @@ on:
- ggml/**/*.m
- ggml/**/*.metal
- scripts/get-flags.mk
- examples/common.h
- examples/common.cpp
- examples/common-whisper.h
- examples/common-whisper.cpp
- examples/stb_vorbis.c
- examples/miniaudio.h
- examples/dr_wav.h
jobs:
ubuntu-22:

View File

@ -6,81 +6,17 @@ on:
- master
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
pre_release_tag:
description: 'Pre-release tag name'
required: false
type: string
run_type:
description: 'Workflow type to run'
required: true
type: choice
options:
- full-ci
- release-only
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: write # for creating release
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
ubuntu_image: "ubuntu:22.04"
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
jobs:
determine-tag:
runs-on: ubuntu-latest
outputs:
tag_name: ${{ steps.tag.outputs.name }}
steps:
- name: Checkout with full history
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER=$(git rev-list --count HEAD)
SHORT_HASH=$(git rev-parse --short=7 HEAD)
CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
echo "Raw values:"
echo "BUILD_NUMBER: $BUILD_NUMBER"
echo "SHORT_HASH: $SHORT_HASH"
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
echo "CUSTOM_TAG: $CUSTOM_TAG"
# Use custom tag if provided
if [[ -n "$CUSTOM_TAG" ]]; then
echo "Using custom tag"
TAG_NAME="${CUSTOM_TAG}"
elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
echo "Using master branch format"
TAG_NAME="b${BUILD_NUMBER}"
else
echo "Using non-master branch format"
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
fi
echo "Final tag name: $TAG_NAME"
echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
ubuntu-22:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -107,8 +43,6 @@ jobs:
cmake --build build --config Release -j $(nproc)'
ubuntu-22-arm64:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -135,8 +69,6 @@ jobs:
cmake --build build --config Release -j $(nproc)'
ubuntu-22-arm-v7:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -163,25 +95,12 @@ jobs:
cmake --build build --config Release -j $(nproc)'
macOS-latest:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: macOS-latest
strategy:
matrix:
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: macOS-latest-swift
evict-old-files: 1d
- name: Dependencies
run: |
brew update
@ -189,38 +108,28 @@ jobs:
- name: Build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DWHISPER_BUILD_EXAMPLES=OFF \
-DWHISPER_BUILD_TESTS=OFF \
-DWHISPER_BUILD_SERVER=OFF \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
cmake -B build
cmake --build build --config Release
freeBSD-latest:
runs-on: macos-13
steps:
- name: Clone
uses: actions/checkout@v4
- name: Build
uses: cross-platform-actions/action@v0.27.0
with:
operating_system: freebsd
version: '14.2'
run: |
sudo pkg update
sudo pkg install -y gmake sdl2 cmake git
cmake -B build
cmake --build build --config Release
# freeBSD-latest:
# runs-on: macos-12
#
# steps:
# - name: Clone
# uses: actions/checkout@v4
#
# - name: Build
# uses: cross-platform-actions/action@v0.24.0
# with:
# operating_system: freebsd
# version: '13.3'
# run: |
# sudo pkg update
# sudo pkg install -y gmake sdl2 cmake
# cmake -B build
# cmake --build build --config Release
ubuntu-22-gcc:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -249,8 +158,6 @@ jobs:
ctest -L gh --output-on-failure'
ubuntu-22-gcc-arm64:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -279,8 +186,6 @@ jobs:
ctest -L gh --output-on-failure'
ubuntu-22-gcc-arm-v7:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -309,8 +214,6 @@ jobs:
ctest -L gh --output-on-failure'
ubuntu-22-clang:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -342,8 +245,6 @@ jobs:
ctest -L gh --output-on-failure'
ubuntu-22-gcc-sanitized:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -372,8 +273,6 @@ jobs:
ctest -L gh --output-on-failure'
ubuntu-22-cmake-sycl:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -424,8 +323,6 @@ jobs:
cmake --build . --config Release -j $(nproc)
ubuntu-22-cmake-sycl-fp16:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -476,8 +373,6 @@ jobs:
cmake --build . --config Release -j $(nproc)
windows-msys2:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: windows-latest
strategy:
@ -522,8 +417,6 @@ jobs:
cmake --build build --config ${{ matrix.build }} -j $(nproc)
windows:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: windows-latest
strategy:
@ -584,8 +477,6 @@ jobs:
path: build/bin/${{ matrix.build }}
windows-blas:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: windows-latest
strategy:
@ -659,8 +550,6 @@ jobs:
path: build/bin/${{ matrix.build }}
windows-cublas:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: windows-2019
strategy:
matrix:
@ -677,134 +566,15 @@ jobs:
- name: Clone repository
uses: actions/checkout@v4
- name: Install Ninja
id: install_ninja
run: |
choco install ninja
- name: Install ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
variant: sccache
evict-old-files: 5d
- name: Install Cuda Toolkit 11.8.0
if: ${{ matrix.cuda-toolkit == '11.8.0' }}
run: |
$CUDA_VERSION = ${{ matrix.cuda-toolkit }}
$CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
$CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
# Components versions
$CUDART_VER = "11.8.89"
$NVCC_VER = "11.8.89"
$NVRTC_VER = "11.8.89"
$CUBLAS_VER = "11.8.1.74"
$NVTX_VER = "11.8.86"
$VS_VER = "11.8.86"
$NVPROF_VER = "11.8.87"
$CCCL_VER = "11.8.89"
# Create the directory where the CUDA Toolkit will be installed
mkdir -p $CUDA_TOOLKIT_DIR
# Install unzip to extract the downloaded files
choco install unzip -y
# Download all the required components
curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
# Extract all the downloaded files to the CUDA Toolkit directory
unzip '*.zip' -d $CUDA_TOOLKIT_DIR
# Copy all the extracted files to the main CUDA Toolkit directory
xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
# Visual Studio integration
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
# Set environment variables
echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Install Cuda Toolkit 12.2.0
if: ${{ matrix.cuda-toolkit == '12.2.0' }}
run: |
$CUDA_VERSION = ${{ matrix.cuda-toolkit }}
$CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
$CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
# Components versions
$CUDART_VER = "12.2.140"
$NVCC_VER = "12.2.140"
$NVRTC_VER = "12.2.140"
$CUBLAS_VER = "12.2.5.6"
$NVTX_VER = "12.2.140"
$PROFILER_VER = "12.2.140"
$VS_VER = "12.2.140"
$NVPROF_VER = "12.2.142"
$CCCL_VER = "12.2.140"
# Create the directory where the CUDA Toolkit will be installed
mkdir -p $CUDA_TOOLKIT_DIR
# Install unzip to extract the downloaded files
choco install unzip -y
# Download all the required components
curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
# Extract all the downloaded files to the CUDA Toolkit directory
unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
# Copy all the extracted files to the main CUDA Toolkit directory
xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
# Visual Studio integration
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
# Set environment variables
echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v2
- name: Install CUDA Toolkit
id: cuda-toolkit
uses: Jimver/cuda-toolkit@v0.2.15
with:
cuda: '${{ matrix.cuda-toolkit }}'
- name: Install 7-Zip
run: choco install 7zip -y
@ -816,30 +586,25 @@ jobs:
echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
- name: Install cmake
run: choco install cmake
- name: Configure CMake
shell: cmd
run: |
cmake -S . -B ./build -A ${{ matrix.arch }} ^
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
-DGGML_CUDA=${{ matrix.cublas }} ^
-DCMAKE_CUDA_ARCHITECTURES=all ^
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
-DSDL2_DIR="%SDL2_DIR%"
- name: Build Project
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake --version
where cmake
cmake -S . -B build -G "Ninja Multi-Config" ^
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
-DGGML_CUDA=${{ matrix.cublas }} ^
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
-DSDL2_DIR="%SDL2_DIR%"
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
- name: Check sccache status after build
run: |
sccache --show-stats
cd ./build
cmake --build . --config ${{ matrix.build }}
- name: Copy CUDA DLLs
run: |
Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
Get-ChildItem "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/" -Filter "*.dll" |
Copy-Item -Destination "build/bin/${{ matrix.build }}"
- name: Copy SDL2.dll
@ -853,8 +618,6 @@ jobs:
path: build/bin/${{ matrix.build }}
emscripten:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
strategy:
@ -878,7 +641,6 @@ jobs:
ios-xcode-build:
runs-on: macos-latest
needs: determine-tag
strategy:
matrix:
@ -909,38 +671,20 @@ jobs:
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
sudo cmake --install . --config Release
- name: xcodebuild for swift package
id: xcodebuild
run: |
./build-xcframework.sh
xcodebuild -scheme whisper-Package -destination 'generic/platform=iOS'
- name: Build objc example
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO build
- name: Build swiftui example
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
- name: Pack artifacts
id: pack_artifacts
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
github.event.inputs.create_release == 'true' ||
github.event.inputs.pre_release_tag != '' }}
run: |
zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
- name: Upload artifacts
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
github.event.inputs.create_release == 'true' ||
github.event.inputs.pre_release_tag != '' }}
uses: actions/upload-artifact@v4
with:
path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
android:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
steps:
@ -969,30 +713,31 @@ jobs:
cd whisper/examples/whisper.android
./gradlew assembleRelease --no-daemon
android_java:
runs-on: ubuntu-22.04
steps:
- name: Clone
uses: actions/checkout@v4
- name: set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: gradle
- name: Setup Android SDK
uses: android-actions/setup-android@v3
with:
cmdline-tools-version: 9.0
- name: Build
run: |
cd examples/whisper.android.java
chmod +x ./gradlew
./gradlew assembleRelease
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
# android_java:
# runs-on: ubuntu-22.04
#
# steps:
# - name: Clone
# uses: actions/checkout@v4
#
# - name: set up JDK 11
# uses: actions/setup-java@v4
# with:
# java-version: '11'
# distribution: 'temurin'
# cache: gradle
#
# - name: Setup Android SDK
# uses: android-actions/setup-android@v3
# with:
# cmdline-tools-version: 9.0
#
# - name: Build
# run: |
# cd examples/whisper.android.java
# chmod +x ./gradlew
# ./gradlew assembleRelease
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
# java:
@ -1039,8 +784,6 @@ jobs:
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
quantize:
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
github.event.inputs.run_type == 'full-ci' }}
runs-on: ubuntu-22.04
steps:
@ -1053,69 +796,3 @@ jobs:
cmake -B build
cmake --build build --config Release
./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
release:
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
github.event.inputs.create_release == 'true' ||
github.event.inputs.pre_release_tag != '' }}
runs-on: ubuntu-latest
needs:
- determine-tag
- ios-xcode-build
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: release
evict-old-files: 1d
# Downloads all the artifacts from the previous jobs
- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v4
with:
path: ./artifact
- name: Move artifacts
id: move_artifacts
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
- name: Create release
id: create_release
uses: ggml-org/action-create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ needs.determine-tag.outputs.tag_name }}
prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
- name: Upload release
id: upload_release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
for (let file of await fs.readdirSync('./artifact/release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: file,
data: await fs.readFileSync(`./artifact/release/${file}`)
});
}
}

View File

@ -28,8 +28,6 @@ jobs:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

View File

@ -1,91 +0,0 @@
name: Examples WASM
on:
push:
branches: ["master"]
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
deploy-wasm-github-pages:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Setup emsdk
uses: mymindstorm/setup-emsdk@v14
- name: Build WASM Examples
# Enable for real build later in whisper.cpp
run: |
mkdir -p build-em && cd build-em
emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
make -j
- name: Create staging directory
run: mkdir -p staging
- name: Create .nojekyll file in staging directory
run: touch staging/.nojekyll
- name: Copy application files
run: |
build_dir=build-em/bin
ls ${build_dir}
# command.wasm
target_dir=staging/command.wasm
mkdir -p ${target_dir}
cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
cp ${build_dir}/libcommand.js ${target_dir}
# bench.wasm
target_dir=staging/bench.wasm
mkdir -p ${target_dir}
cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
cp ${build_dir}/libbench.js ${target_dir}
# stream.wasm
target_dir=staging/stream.wasm
mkdir -p ${target_dir}
cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
cp ${build_dir}/libstream.js ${target_dir}
# whisper.wasm (this will be the main example page)
target_dir=staging
mkdir -p ${target_dir}
cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
cp ${build_dir}/libmain.js ${target_dir}
# Copy Cross-Origin Isolation service worker
cp -v examples/coi-serviceworker.js staging/
- name: List files in staging directory (for debugging)
run: |
echo "Files in staging directory:"
find staging -type f | sort
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: ./staging
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

2
.gitignore vendored
View File

@ -58,5 +58,3 @@ cmake-build-debug/
.cxx/
.gradle/
local.properties
.log
.exe

View File

@ -38,13 +38,8 @@ if (EMSCRIPTEN)
# TODO: without these, we get the following error:
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
@ -67,8 +62,7 @@ option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings"
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
# build
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
# sanitizers
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
@ -127,15 +121,7 @@ whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
#
if (NOT TARGET ggml)
if (WHISPER_USE_SYSTEM_GGML)
find_package(ggml REQUIRED)
if (NOT ggml_FOUND)
message(FATAL_ERROR "System-installed GGML library not found.")
endif()
add_library(ggml ALIAS ggml::ggml)
else()
add_subdirectory(ggml)
endif()
add_subdirectory(ggml)
# ... otherwise assume ggml is added by a parent CMakeLists.txt
endif()
add_subdirectory(src)

View File

@ -18,6 +18,17 @@ samples:
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
@echo "Converting to 16-bit WAV ..."
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
@rm samples/*.ogg
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
@rm samples/mm1.wav
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
@rm samples/a13.mp3
@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
@rm samples/diffusion2023-07-03.flac
#
# Models
@ -48,7 +59,7 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
@echo "Running $@ on all samples in ./samples ..."
@echo "==============================================="
@echo ""
@for f in samples/*$(.flac .mp3 .ogg .wav); do \
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \

19
Package.swift Normal file
View File

@ -0,0 +1,19 @@
// swift-tools-version:5.5
import PackageDescription
let package = Package(
name: "whisper",
platforms: [
.macOS(.v12),
.iOS(.v14),
.watchOS(.v4),
.tvOS(.v14)
],
products: [
.library(name: "whisper", targets: ["whisper"]),
],
targets: [
.systemLibrary(name: "whisper", pkgConfig: "whisper"),
]
)

View File

@ -184,11 +184,11 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in
```
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
- Python 3.11 is recommended.
- Python 3.10 is recommended.
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
- To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
- To activate the environment, use: `conda activate py311-whisper`
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
- To activate the environment, use: `conda activate py310-whisper`
- Generate a Core ML model. For example, to generate a `base.en` model, use:
@ -427,8 +427,7 @@ For detailed instructions on how to use Conan, please refer to the [Conan docume
This is a naive example of performing real-time inference on audio from your microphone.
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly.
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
```bash
cmake -B build -DWHISPER_SDL2=ON

View File

@ -0,0 +1,5 @@
module whisper [system] {
header "whisper.h"
link "whisper"
export *
}

View File

@ -0,0 +1,4 @@
#pragma once
#include <whisper.h>

View File

@ -11,11 +11,11 @@ UNAME_M := $(shell uname -m)
endif
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
BUILD_DIR := build_go
BUILD_DIR := build
MODELS_DIR := models
EXAMPLES_DIR := $(wildcard examples/*)
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
LIBRARY_PATH := $(abspath ../..)
ifeq ($(GGML_CUDA),1)
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
@ -29,10 +29,8 @@ endif
all: clean whisper examples
whisper: mkdir
cmake -S ../.. -B ../../${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF
cmake --build ../../${BUILD_DIR} --target whisper
@echo Build whisper
@${MAKE} -C ../.. libwhisper.a
test: model-small whisper modtidy
ifeq ($(UNAME_S),Darwin)

View File

@ -31,7 +31,7 @@ func main() {
if err != nil {
panic(err)
}
if err := context.Process(samples, nil, nil, nil); err != nil {
if err := context.Process(samples, nil, nil); err != nil {
return err
}

View File

@ -9,23 +9,22 @@ import (
// ContextForSignal returns a context object which is cancelled when a signal
// is received. It returns nil if no signal parameter is provided
func ContextForSignal(signals ...os.Signal) context.Context {
if len(signals) == 0 {
return nil
}
if len(signals) == 0 {
return nil
}
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
ctx, cancel := context.WithCancel(context.Background())
ch := make(chan os.Signal)
ctx, cancel := context.WithCancel(context.Background())
// Send message on channel when signal received
signal.Notify(ch, signals...)
// Send message on channel when signal received
signal.Notify(ch, signals...)
// When any signal is received, call cancel
go func() {
<-ch
cancel()
}()
// When any signal received, call cancel
go func() {
<-ch
cancel()
}()
// Return success
return ctx
// Return success
return ctx
}

View File

@ -9,7 +9,6 @@ import (
"net/url"
"os"
"path/filepath"
"strings"
"syscall"
"time"
)
@ -18,27 +17,14 @@ import (
// CONSTANTS
const (
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
)
var (
// The models which will be downloaded, if no model is specified as an argument
modelNames = []string{
"tiny", "tiny-q5_1", "tiny-q8_0",
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
"base", "base-q5_1", "base-q8_0",
"base.en", "base.en-q5_1", "base.en-q8_0",
"small", "small-q5_1", "small-q8_0",
"small.en", "small.en-q5_1", "small.en-q8_0",
"medium", "medium-q5_0", "medium-q8_0",
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
"large-v1",
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
"large-v3", "large-v3-q5_0",
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
}
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
)
var (
@ -58,25 +44,7 @@ var (
func main() {
flag.Usage = func() {
name := filepath.Base(flag.CommandLine.Name())
fmt.Fprintf(flag.CommandLine.Output(), `
Usage: %s [options] [<model>...]
Options:
-out string Specify the output folder where models will be saved.
Default: Current working directory.
-timeout duration Set the maximum duration for downloading a model.
Example: 10m, 1h (default: 30m0s).
-quiet Suppress all output except errors.
Examples:
1. Download a specific model:
%s -out ./models tiny-q8_0
2. Download all models:
%s -out ./models
`, name, name, name)
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
flag.PrintDefaults()
}
flag.Parse()
@ -146,87 +114,23 @@ func GetOut() (string, error) {
// GetModels returns the list of models to download
func GetModels() []string {
if flag.NArg() == 0 {
fmt.Println("No model specified.")
fmt.Println("Preparing to download all models...")
// Calculate total download size
fmt.Println("Calculating total download size...")
totalSize, err := CalculateTotalDownloadSize(modelNames)
if err != nil {
fmt.Println("Error calculating download sizes:", err)
os.Exit(1)
}
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
fmt.Println("Would you like to download all models? (y/N)")
// Prompt for user input
var response string
fmt.Scanln(&response)
if response != "y" && response != "Y" {
fmt.Println("Aborting. Specify a model to download.")
os.Exit(0)
}
return modelNames // Return all models if confirmed
return modelNames
} else {
return flag.Args()
}
return flag.Args() // Return specific models if arguments are provided
}
func CalculateTotalDownloadSize(models []string) (int64, error) {
var totalSize int64
client := http.Client{}
for _, model := range models {
modelURL, err := URLForModel(model)
if err != nil {
return 0, err
}
// Issue a HEAD request to get the file size
req, err := http.NewRequest("HEAD", modelURL, nil)
if err != nil {
return 0, err
}
resp, err := client.Do(req)
if err != nil {
return 0, err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
continue
}
size := resp.ContentLength
totalSize += size
}
return totalSize, nil
}
// URLForModel returns the URL for the given model on huggingface.co
func URLForModel(model string) (string, error) {
// Ensure "ggml-" prefix is added only once
if !strings.HasPrefix(model, "ggml-") {
model = "ggml-" + model
}
// Ensure ".bin" extension is added only once
if filepath.Ext(model) != srcExt {
model += srcExt
}
// Parse the base URL
url, err := url.Parse(srcUrl)
if err != nil {
return "", err
} else {
url.Path = filepath.Join(url.Path, model)
}
// Ensure no trailing slash in the base URL
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
return url.String(), nil
}

View File

@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
// Process the data
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
context.ResetTimings()
if err := context.Process(data, nil, cb, nil); err != nil {
if err := context.Process(data, cb, nil); err != nil {
return err
}

View File

@ -71,10 +71,6 @@ func (context *context) Language() string {
return whisper.Whisper_lang_str(context.params.Language())
}
func (context *context) DetectedLanguage() string {
return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
}
// Set translate flag
func (context *context) SetTranslate(v bool) {
context.params.SetTranslate(v)
@ -193,7 +189,6 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
// Process new sample data and return any errors
func (context *context) Process(
data []float32,
callEncoderBegin EncoderBeginCallback,
callNewSegment SegmentCallback,
callProgress ProgressCallback,
) error {
@ -208,20 +203,7 @@ func (context *context) Process(
// We don't do parallel processing at the moment
processors := 0
if processors > 1 {
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
func(new int) {
if callNewSegment != nil {
num_segments := context.model.ctx.Whisper_full_n_segments()
s0 := num_segments - new
for i := s0; i < num_segments; i++ {
callNewSegment(toSegment(context.model.ctx, i))
}
}
}); err != nil {
return err
}
} else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
func(new int) {
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
if callNewSegment != nil {
num_segments := context.model.ctx.Whisper_full_n_segments()
s0 := num_segments - new
@ -229,11 +211,22 @@ func (context *context) Process(
callNewSegment(toSegment(context.model.ctx, i))
}
}
}, func(progress int) {
if callProgress != nil {
callProgress(progress)
}
}); err != nil {
return err
}
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
if callNewSegment != nil {
num_segments := context.model.ctx.Whisper_full_n_segments()
s0 := num_segments - new
for i := s0; i < num_segments; i++ {
callNewSegment(toSegment(context.model.ctx, i))
}
}
}, func(progress int) {
if callProgress != nil {
callProgress(progress)
}
}); err != nil {
return err
}

View File

@ -88,37 +88,6 @@ func TestProcess(t *testing.T) {
context, err := model.NewContext()
assert.NoError(err)
err = context.Process(data, nil, nil, nil)
err = context.Process(data, nil, nil)
assert.NoError(err)
}
func TestDetectedLanguage(t *testing.T) {
assert := assert.New(t)
fh, err := os.Open(SamplePath)
assert.NoError(err)
defer fh.Close()
// Decode the WAV file - load the full buffer
dec := wav.NewDecoder(fh)
buf, err := dec.FullPCMBuffer()
assert.NoError(err)
assert.Equal(uint16(1), dec.NumChans)
data := buf.AsFloat32Buffer().Data
model, err := whisper.New(ModelPath)
assert.NoError(err)
assert.NotNil(model)
defer model.Close()
context, err := model.NewContext()
assert.NoError(err)
err = context.Process(data, nil, nil, nil)
assert.NoError(err)
expectedLanguage := "en"
actualLanguage := context.DetectedLanguage()
assert.Equal(expectedLanguage, actualLanguage)
}

View File

@ -16,10 +16,6 @@ type SegmentCallback func(Segment)
// processing. It is called during the Process function
type ProgressCallback func(int)
// EncoderBeginCallback is the callback function for checking if we want to
// continue processing. It is called during the Process function
type EncoderBeginCallback func() bool
// Model is the interface to a whisper model. Create a new model with the
// function whisper.New(string)
type Model interface {
@ -35,13 +31,12 @@ type Model interface {
Languages() []string
}
// Context is the speech recognition context.
// Context is the speach recognition context.
type Context interface {
SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
SetTranslate(bool) // Set translate flag
IsMultilingual() bool // Return true if the model is multilingual.
Language() string // Get language
DetectedLanguage() string // Get detected language
SetOffset(time.Duration) // Set offset
SetDuration(time.Duration) // Set duration
@ -63,7 +58,7 @@ type Context interface {
// Process mono audio data and return any errors.
// If defined, newly generated segments are passed to the
// callback function during processing.
Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
Process([]float32, SegmentCallback, ProgressCallback) error
// After process is called, return segments until the end of the stream
// is reached, when io.EOF is returned.

View File

@ -9,7 +9,7 @@ import (
// CGO
/*
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
#include <whisper.h>
#include <stdlib.h>

View File

@ -25,13 +25,13 @@ sourceSets {
}
tasks.register('copyLibwhisperDynlib', Copy) {
from '../../build/src'
include 'libwhisper.dylib'
from '../../build'
include 'libwhisper.dynlib'
into 'build/generated/resources/main/darwin'
}
tasks.register('copyLibwhisperSo', Copy) {
from '../../build/src'
from '../../build'
include 'libwhisper.so'
into 'build/generated/resources/main/linux-x86-64'
}
@ -55,12 +55,7 @@ java {
withJavadocJar()
}
sourcesJar() {
dependsOn copyLibs
}
jar {
dependsOn copyLibs
exclude '**/whisper_java.exp', '**/whisper_java.lib'
}
@ -72,9 +67,6 @@ tasks.withType(Test) {
useJUnitPlatform()
}
test.dependsOn copyLibs
processResources.dependsOn copyLibs
dependencies {
implementation "net.java.dev.jna:jna:5.13.0"
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"

0
bindings/java/gradlew vendored Executable file → Normal file
View File

View File

@ -1,24 +0,0 @@
package io.github.ggerganov.whispercpp;
/**
* Presets for alignment heads in DTW token timestamps
*/
public class WhisperConstants {
// Alignment heads presets
public static final int WHISPER_AHEADS_NONE = 0;
public static final int WHISPER_AHEADS_TINY_EN = 1;
public static final int WHISPER_AHEADS_TINY = 2;
public static final int WHISPER_AHEADS_BASE_EN = 3;
public static final int WHISPER_AHEADS_BASE = 4;
public static final int WHISPER_AHEADS_SMALL_EN = 5;
public static final int WHISPER_AHEADS_SMALL = 6;
public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
public static final int WHISPER_AHEADS_MEDIUM = 8;
public static final int WHISPER_AHEADS_LARGE_V1 = 9;
public static final int WHISPER_AHEADS_LARGE_V2 = 10;
public static final int WHISPER_AHEADS_LARGE_V3 = 11;
public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
public static final int WHISPER_AHEADS_CUSTOM = 13;
public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
public static final int WHISPER_AHEADS_COUNT = 15;
}

View File

@ -1,9 +1,7 @@
package io.github.ggerganov.whispercpp;
import com.sun.jna.NativeLong;
import com.sun.jna.Structure;
import com.sun.jna.ptr.PointerByReference;
import com.sun.jna.Pointer;
import io.github.ggerganov.whispercpp.ggml.GgmlType;
import io.github.ggerganov.whispercpp.WhisperModel;
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
@ -11,26 +9,33 @@ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
import java.util.List;
public class WhisperContext extends Structure {
public NativeLong t_load_us;
public NativeLong t_start_us;
int t_load_us = 0;
int t_start_us = 0;
/** weight type (FP32 / FP16 / QX) */
public GgmlType wtype = GgmlType.GGML_TYPE_F16;
GgmlType wtype = GgmlType.GGML_TYPE_F16;
/** intermediate type (FP32 or FP16) */
public GgmlType itype = GgmlType.GGML_TYPE_F16;
GgmlType itype = GgmlType.GGML_TYPE_F16;
public WhisperContextParams.ByValue params;
public Pointer model;
public Pointer vocab;
public Pointer state;
// WhisperModel model;
public PointerByReference model;
// whisper_vocab vocab;
// whisper_state * state = nullptr;
public PointerByReference vocab;
public PointerByReference state;
/** populated by whisper_init_from_file_with_params() */
public Pointer path_model;
String path_model;
WhisperContextParams params;
@Override
protected List<String> getFieldOrder() {
return List.of("t_load_us", "t_start_us", "wtype", "itype",
"params", "model", "vocab", "state", "path_model");
}
// public static class ByReference extends WhisperContext implements Structure.ByReference {
// }
//
// public static class ByValue extends WhisperContext implements Structure.ByValue {
// }
//
// @Override
// protected List<String> getFieldOrder() {
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
// }
}

View File

@ -43,11 +43,11 @@ public class WhisperCpp implements AutoCloseable {
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
* @param params - params to use when initialising the context
*/
public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
initContextImpl(modelPath, params);
}
private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
if (ctx != null) {
lib.whisper_free(ctx);
}
@ -69,13 +69,15 @@ public class WhisperCpp implements AutoCloseable {
/**
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
* Returns a ByValue instance to ensure proper parameter passing to native code.
* Because this function allocates memory for the params, the caller must call either:
* - call `whisper_free_context_params()`
* - `Native.free(Pointer.nativeValue(pointer));`
*/
public WhisperContextParams.ByValue getContextDefaultParams() {
WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
lib.whisper_context_default_params_by_ref());
valueParams.read();
return valueParams;
public WhisperContextParams getContextDefaultParams() {
paramsPointer = lib.whisper_context_default_params_by_ref();
WhisperContextParams params = new WhisperContextParams(paramsPointer);
params.read();
return params;
}
/**
@ -86,7 +88,7 @@ public class WhisperCpp implements AutoCloseable {
*
* @param strategy - GREEDY
*/
public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
Pointer pointer;
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
@ -102,7 +104,7 @@ public class WhisperCpp implements AutoCloseable {
pointer = beamParamsPointer;
}
WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
WhisperFullParams params = new WhisperFullParams(pointer);
params.read();
return params;
}
@ -136,21 +138,15 @@ public class WhisperCpp implements AutoCloseable {
}
/**
* Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text.
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
* Not thread safe for same context
* Uses the specified decoding strategy to obtain the text.
*/
public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
if (ctx == null) {
throw new IllegalStateException("Model not initialised");
}
/*
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
valueParams.read();
*/
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
throw new IOException("Failed to process audio");
}
@ -167,17 +163,12 @@ public class WhisperCpp implements AutoCloseable {
return str.toString().trim();
}
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
if (ctx == null) {
throw new IllegalStateException("Model not initialised");
}
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
valueParams.read();
if (lib.whisper_full(ctx, valueParams, audioData, audioData.length) != 0) {
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
throw new IOException("Failed to process audio");
}

View File

@ -38,7 +38,7 @@ public interface WhisperCppJnaLibrary extends Library {
* @param params Pointer to whisper_context_params
* @return Whisper context on success, null on failure
*/
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
/**
* Allocate (almost) all memory needed for the model by loading from a buffer.
@ -180,12 +180,12 @@ public interface WhisperCppJnaLibrary extends Library {
/**
* @return the id of the specified language, returns -1 if not found.
* Examples:
* "de" -&gt; 2
* "german" -&gt; 2
* "de" -> 2
* "german" -> 2
*/
int whisper_lang_id(String lang);
/** @return the short string of the specified language id (e.g. 2 -&gt; "de"), returns nullptr if not found */
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
String whisper_lang_str(int id);
/**
@ -268,21 +268,20 @@ public interface WhisperCppJnaLibrary extends Library {
void whisper_free_params(Pointer params);
/**
* Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
* Not thread safe for same context
* Uses the specified decoding strategy to obtain the text.
*/
int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
//int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
// Result is stored in the default state of the context
// Not thread safe if executed in parallel on the same context.
// It seems this approach can offer some speedup in some cases.
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
/**
* Number of generated text segments.

View File

@ -1,17 +0,0 @@
package io.github.ggerganov.whispercpp.callbacks;
import com.sun.jna.Callback;
/**
* Callback for aborting GGML computation
* Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
*/
public interface GgmlAbortCallback extends Callback {
/**
* Return true to abort the computation, false to continue
*
* @param data User data passed to the callback
* @return true to abort, false to continue
*/
boolean invoke(com.sun.jna.Pointer data);
}

View File

@ -1,30 +0,0 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
public class WhisperAhead extends Structure {
public int n_text_layer;
public int n_head;
public WhisperAhead() {
super();
}
public WhisperAhead(int textLayer, int head) {
super();
this.n_text_layer = textLayer;
this.n_head = head;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList("n_text_layer", "n_head");
}
public static class ByReference extends WhisperAhead implements Structure.ByReference {}
public static class ByValue extends WhisperAhead implements Structure.ByValue {}
}

View File

@ -1,41 +0,0 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
public class WhisperAheads extends Structure {
public NativeLong n_heads;
public Pointer heads;
public WhisperAheads() {
super();
}
/**
* Create alignment heads from an array of WhisperAhead objects
*/
public void setHeads(WhisperAhead[] aheadsArray) {
this.n_heads = new NativeLong(aheadsArray.length);
int structSize = aheadsArray[0].size();
Memory mem = new Memory(structSize * aheadsArray.length);
for (int i = 0; i < aheadsArray.length; i++) {
aheadsArray[i].write();
byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
mem.write(i * structSize, buffer, 0, buffer.length);
}
this.heads = mem;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList("n_heads", "heads");
}
public static class ByReference extends WhisperAheads implements Structure.ByReference {}
public static class ByValue extends WhisperAheads implements Structure.ByValue {}
}

View File

@ -1,5 +1,7 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
@ -9,73 +11,21 @@ import java.util.List;
* whisper_context_default_params()
*/
public class WhisperContextParams extends Structure {
public WhisperContextParams(Pointer p) {
super(p);
}
public WhisperContextParams() {
super();
}
/** Use GPU for inference (default = true) */
/** Use GPU for inference Number (default = true) */
public CBool use_gpu;
/** Use flash attention (default = false) */
public CBool flash_attn;
/** CUDA device to use (default = 0) */
public int gpu_device;
/** [EXPERIMENTAL] Enable token-level timestamps with DTW (default = false) */
public CBool dtw_token_timestamps;
/** [EXPERIMENTAL] Alignment heads preset for DTW */
public int dtw_aheads_preset;
/** Number of top layers to use for DTW when using WHISPER_AHEADS_N_TOP_MOST preset */
public int dtw_n_top;
public WhisperAheads.ByValue dtw_aheads;
/** DTW memory size (internal use) */
public NativeLong dtw_mem_size;
/** Use GPU for inference */
/** Use GPU for inference Number (default = true) */
public void useGpu(boolean enable) {
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
}
/** Use flash attention */
public void useFlashAttn(boolean enable) {
flash_attn = enable ? CBool.TRUE : CBool.FALSE;
}
/** Enable DTW token-level timestamps */
public void enableDtwTokenTimestamps(boolean enable) {
dtw_token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
}
/** Set DTW alignment heads preset */
public void setDtwAheadsPreset(int preset) {
dtw_aheads_preset = preset;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList(
"use_gpu",
"flash_attn",
"gpu_device",
"dtw_token_timestamps",
"dtw_aheads_preset",
"dtw_n_top",
"dtw_aheads",
"dtw_mem_size"
);
}
public static class ByValue extends WhisperContextParams implements Structure.ByValue {
public ByValue() { super(); }
public ByValue(Pointer p) { super(p); }
return Arrays.asList("use_gpu");
}
}

View File

@ -5,7 +5,6 @@ import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
import io.github.ggerganov.whispercpp.callbacks.GgmlAbortCallback;
import java.util.Arrays;
import java.util.List;
@ -17,12 +16,10 @@ import java.util.List;
*/
public class WhisperFullParams extends Structure {
public WhisperFullParams() {
super();
}
public WhisperFullParams(Pointer p) {
super(p);
// super(p, ALIGN_MSVC);
// super(p, ALIGN_GNUC);
}
/** Sampling strategy for whisper_full() function. */
@ -72,10 +69,10 @@ public class WhisperFullParams extends Structure {
single_segment = single ? CBool.TRUE : CBool.FALSE;
}
/** Flag to print special tokens (e.g., &lt;SOT&gt;, &lt;EOT&gt;, &lt;BEG&gt;, etc.). (default = false) */
/** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
public CBool print_special;
/** Flag to print special tokens (e.g., &lt;SOT&gt;, &lt;EOT&gt;, &lt;BEG&gt;, etc.). (default = false) */
/** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
public void printSpecial(boolean enable) {
print_special = enable ? CBool.TRUE : CBool.FALSE;
}
@ -132,14 +129,6 @@ public class WhisperFullParams extends Structure {
/** Maximum tokens per segment (0, default = no limit) */
public int max_tokens;
/** [EXPERIMENTAL] Enable debug mode for extra info */
public CBool debug_mode;
/** Enable debug mode */
public void enableDebugMode(boolean enable) {
debug_mode = enable ? CBool.TRUE : CBool.FALSE;
}
/** Overwrite the audio context size (0 = use default). */
public int audio_ctx;
@ -285,16 +274,6 @@ public class WhisperFullParams extends Structure {
*/
public Pointer encoder_begin_callback_user_data;
/** Callback used to abort GGML computation */
public Pointer abort_callback;
/** User data for the abort_callback */
public Pointer abort_callback_user_data;
public void setAbortCallback(GgmlAbortCallback callback) {
abort_callback = CallbackReference.getFunctionPointer(callback);
}
/**
* Callback by each decoder to filter obtained logits.
* WhisperLogitsFilterCallback
@ -331,28 +310,17 @@ public class WhisperFullParams extends Structure {
@Override
protected List<String> getFieldOrder() {
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
"offset_ms", "duration_ms", "translate", "no_context",
"no_timestamps", "single_segment", "print_special",
"print_progress", "print_realtime", "print_timestamps",
"token_timestamps", "thold_pt", "thold_ptsum", "max_len",
"split_on_word", "max_tokens", "debug_mode", "audio_ctx",
"tdrz_enable", "suppress_regex", "initial_prompt",
"prompt_tokens", "prompt_n_tokens", "language", "detect_language",
"suppress_blank", "suppress_nst", "temperature",
"max_initial_ts", "length_penalty", "temperature_inc",
"entropy_thold", "logprob_thold", "no_speech_thold", "greedy",
"beam_search", "new_segment_callback", "new_segment_callback_user_data",
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
"no_context", "single_segment", "no_timestamps",
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
"suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
"new_segment_callback", "new_segment_callback_user_data",
"progress_callback", "progress_callback_user_data",
"encoder_begin_callback", "encoder_begin_callback_user_data",
"abort_callback", "abort_callback_user_data",
"logits_filter_callback", "logits_filter_callback_user_data",
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
}
public static class ByValue extends WhisperFullParams implements Structure.ByValue {
public ByValue() { super(); }
public ByValue(Pointer p) { super(p); }
}
}

View File

@ -76,7 +76,7 @@ class WhisperCppTest {
float[] floats = new float[b.length / 2];
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
params.print_progress = CBool.FALSE;
//params.initial_prompt = "and so my fellow Americans um, like";

View File

@ -33,9 +33,6 @@ mkdir build-em && cd build-em
emcmake cmake .. && make -j
# run test
node ../tests/test-whisper.js
# For Node.js versions prior to v16.4.0, experimental features need to be enabled:
node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
# publish npm package

View File

@ -1,7 +1,5 @@
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
ggml/src/ggml-cpu/ggml-cpu.cpp \
ggml/src/ggml-cpu/unary-ops.cpp \
ggml/src/ggml-cpu/binary-ops.cpp \
ggml/include/ggml-backend.h \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h \

View File

@ -35,7 +35,7 @@ if $GGML_METAL
$GGML_METAL_EMBED_LIBRARY = true
end
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
$MK_CFLAGS = '-std=c11 -fPIC'
$MK_CXXFLAGS = '-std=c++17 -fPIC'
$MK_NVCCFLAGS = '-std=c++17'
@ -168,14 +168,10 @@ $OBJ_GGML <<
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
'ggml/src/ggml-cpu/ggml-cpu-traits.o' <<
'ggml/src/ggml-cpu/unary-ops.o' <<
'ggml/src/ggml-cpu/binary-ops.o'
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
$OBJ_WHISPER <<
'src/whisper.o' <<
'examples/common.o' <<
'examples/common-whisper.o'
'src/whisper.o'
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
$objs <<

View File

@ -1,6 +1,7 @@
#include <ruby.h>
#include "ruby_whisper.h"
#include "common-whisper.h"
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include <string>
#include <vector>
@ -46,9 +47,84 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
return self;
// WAV input - this is directly from main.cpp example
{
drwav wav;
std::vector<uint8_t> wav_data; // used for pipe input from stdin
if (fname_inp == "-") {
{
uint8_t buf[1024];
while (true) {
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
wav_data.insert(wav_data.end(), buf, buf + n);
}
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from stdin\n");
return self;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
return self;
}
if (wav.channels != 1 && wav.channels != 2) {
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
return self;
}
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
return self;
}
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
return self;
}
if (wav.bitsPerSample != 16) {
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
return self;
}
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
std::vector<int16_t> pcm16;
pcm16.resize(n*wav.channels);
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
drwav_uninit(&wav);
// convert to mono, float
pcmf32.resize(n);
if (wav.channels == 1) {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[i])/32768.0f;
}
} else {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
}
}
if (rwp->diarize) {
// convert to stereo, float
pcmf32s.resize(2);
pcmf32s[0].resize(n);
pcmf32s[1].resize(n);
for (uint64_t i = 0; i < n; i++) {
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
}
}
}
{
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race

View File

@ -25,7 +25,7 @@ class TestCallback < TestBase
assert start_time >= 0
assert_kind_of Integer, end_time
assert end_time > 0
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text) if i_segment == 0
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
end
}
@ -145,9 +145,9 @@ class TestCallback < TestBase
def test_abort_on
do_abort = false
_aborted_from_callback = false
aborted_from_callback = false
@params.on_new_segment do |segment|
do_abort = true if segment.text.match?(/ask/)
do_abort = true if segment.text.match? /ask/
end
i = 0
@params.abort_on do

View File

@ -4,7 +4,7 @@ class TestError < TestBase
def test_error
error = Whisper::Error.new(-2)
assert_equal "failed to compute log mel spectrogram", error.message
assert_equal(-2, error.code)
assert_equal -2, error.code
end
def test_unknown_error
@ -14,7 +14,7 @@ class TestError < TestBase
def test_non_int_code
assert_raise TypeError do
_error = Whisper::Error.new("non int")
error = Whisper::Error.new("non int")
end
end
end

View File

@ -162,7 +162,7 @@ class TestParams < TestBase
end
def test_length_penalty
assert_equal(-1.0, @params.length_penalty)
assert_equal -1.0, @params.length_penalty
@params.length_penalty = 0.5
assert_equal 0.5, @params.length_penalty
end
@ -180,9 +180,9 @@ class TestParams < TestBase
end
def test_logprob_thold
assert_in_delta(-1.0, @params.logprob_thold)
assert_in_delta -1.0, @params.logprob_thold
@params.logprob_thold = -0.5
assert_in_delta(-0.5, @params.logprob_thold)
assert_in_delta -0.5, @params.logprob_thold
end
def test_no_speech_thold

View File

@ -49,13 +49,13 @@ class TestSegment < TestBase
if index == 0
seg = segment
assert_equal 0, segment.start_time
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
end
index += 1
end
whisper.transcribe(AUDIO, params)
assert_equal 0, seg.start_time
assert_match(/ask not what your country can do for you, ask what you can do for your country/, seg.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
end
def test_on_new_segment_twice

View File

@ -16,7 +16,7 @@ class TestWhisper < TestBase
params.print_timestamps = false
@whisper.transcribe(AUDIO, params) {|text|
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
}
end
@ -32,7 +32,7 @@ class TestWhisper < TestBase
def test_full_get_segment
segment = whisper.full_get_segment(0)
assert_equal 0, segment.start_time
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
end
def test_full_get_segment_t0
@ -59,7 +59,7 @@ class TestWhisper < TestBase
end
def test_full_get_segment_text
assert_match(/ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0))
assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
end
def test_full_get_segment_no_speech_prob
@ -134,14 +134,14 @@ class TestWhisper < TestBase
@whisper.full(@params, @samples, @samples.length)
assert_equal 1, @whisper.full_n_segments
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_without_length
@whisper.full(@params, @samples)
assert_equal 1, @whisper.full_n_segments
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_enumerator
@ -149,7 +149,7 @@ class TestWhisper < TestBase
@whisper.full(@params, samples, @samples.length)
assert_equal 1, @whisper.full_n_segments
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_enumerator_without_length
@ -171,28 +171,26 @@ class TestWhisper < TestBase
@whisper.full(@params, samples)
assert_equal 1, @whisper.full_n_segments
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_parallel
nprocessors = 2
@whisper.full_parallel(@params, @samples, @samples.length, nprocessors)
@whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)
assert_equal nprocessors, @whisper.full_n_segments
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match(/ask what you can do/i, text)
assert_match(/for your country/i, text)
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_with_memory_view
nprocessors = 2
samples = JFKReader.new(AUDIO)
@whisper.full_parallel(@params, samples, nil, nprocessors)
@whisper.full_parallel(@params, samples, nil, Etc.nprocessors)
assert_equal nprocessors, @whisper.full_n_segments
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match(/ask what you can do/i, text)
assert_match(/for your country/i, text)
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_length_and_n_processors
@ -200,18 +198,17 @@ class TestWhisper < TestBase
assert_equal 1, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match(/ask what you can do/i, text)
assert_match(/for your country/i, text)
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_length
nprocessors = 2
@whisper.full_parallel(@params, @samples, nil, nprocessors)
@whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)
assert_equal nprocessors, @whisper.full_n_segments
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match(/ask what you can do/i, text)
assert_match(/for your country/i, text)
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_n_processors
@ -219,8 +216,8 @@ class TestWhisper < TestBase
assert_equal 1, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match(/ask what you can do/i, text)
assert_match(/for your country/i, text)
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
end
end

View File

@ -1,535 +0,0 @@
#!/bin/bash
#
# Options
IOS_MIN_OS_VERSION=16.4
MACOS_MIN_OS_VERSION=13.3
VISIONOS_MIN_OS_VERSION=1.0
TVOS_MIN_OS_VERSION=16.4
BUILD_SHARED_LIBS=OFF
WHISPER_BUILD_EXAMPLES=OFF
WHISPER_BUILD_TESTS=OFF
WHISPER_BUILD_SERVER=OFF
GGML_METAL=ON
GGML_METAL_EMBED_LIBRARY=ON
GGML_BLAS_DEFAULT=ON
GGML_METAL_USE_BF16=ON
GGML_OPENMP=OFF
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
# Common options for all builds
COMMON_CMAKE_ARGS=(
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
-DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
-DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
-DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
-DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-DWHISPER_BUILD_EXAMPLES=${WHISPER_BUILD_EXAMPLES}
-DWHISPER_BUILD_TESTS=${WHISPER_BUILD_TESTS}
-DWHISPER_BUILD_SERVER=${WHISPER_BUILD_SERVER}
-DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
-DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
-DGGML_METAL=${GGML_METAL}
-DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
-DGGML_NATIVE=OFF
-DGGML_OPENMP=${GGML_OPENMP}
)
check_required_tool() {
local tool=$1
local install_message=$2
if ! command -v $tool &> /dev/null; then
echo "Error: $tool is required but not found."
echo "$install_message"
exit 1
fi
}
echo "Checking for required tools..."
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
set -e
## Clean up previous builds
rm -rf build-apple
rm -rf build-ios-sim
rm -rf build-ios-device
rm -rf build-macos
rm -rf build-visionos
rm -rf build-visionos-sim
rm -rf build-tvos-sim
rm -rf build-tvos-device
# Setup the xcframework build directory structure
setup_framework_structure() {
local build_dir=$1
local min_os_version=$2
local platform=$3 # "ios", "macos", "visionos", or "tvos"
local framework_name="whisper"
echo "Creating ${platform}-style framework structure for ${build_dir}"
if [[ "$platform" == "macos" ]]; then
# macOS versioned structure uses versioned directories
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
# Create symbolic links
ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
else
# iOS/VisionOS/tvOS use a flat structure
mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
# Remove any existing structure to ensure clean build
rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
fi
# Copy all required headers (common for all platforms)
cp include/whisper.h ${header_path}
cp ggml/include/ggml.h ${header_path}
cp ggml/include/ggml-alloc.h ${header_path}
cp ggml/include/ggml-backend.h ${header_path}
cp ggml/include/ggml-metal.h ${header_path}
cp ggml/include/ggml-cpu.h ${header_path}
cp ggml/include/ggml-blas.h ${header_path}
cp ggml/include/gguf.h ${header_path}
# Create module map (common for all platforms)
cat > ${module_path}module.modulemap << EOF
framework module whisper {
header "whisper.h"
header "ggml.h"
header "ggml-alloc.h"
header "ggml-backend.h"
header "ggml-metal.h"
header "ggml-cpu.h"
header "ggml-blas.h"
header "gguf.h"
link "c++"
link framework "Accelerate"
link framework "Metal"
link framework "Foundation"
export *
}
EOF
# Platform-specific settings for Info.plist
local platform_name=""
local sdk_name=""
local supported_platform=""
case "$platform" in
"ios")
platform_name="iphoneos"
sdk_name="iphoneos${min_os_version}"
supported_platform="iPhoneOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>1</integer>
<integer>2</integer>
</array>'
;;
"macos")
platform_name="macosx"
sdk_name="macosx${min_os_version}"
supported_platform="MacOSX"
local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
local device_family=""
;;
"visionos")
platform_name="xros"
sdk_name="xros${min_os_version}"
supported_platform="XRPlatform"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=""
;;
"tvos")
platform_name="appletvos"
sdk_name="appletvos${min_os_version}"
supported_platform="AppleTVOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>3</integer>
</array>'
;;
esac
# Create Info.plist
cat > ${plist_path} << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>whisper</string>
<key>CFBundleIdentifier</key>
<string>org.ggml.whisper</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>whisper</string>
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>MinimumOSVersion</key>
<string>${min_os_version}</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>${supported_platform}</string>
</array>${device_family}
<key>DTPlatformName</key>
<string>${platform_name}</string>
<key>DTSDKName</key>
<string>${sdk_name}</string>
</dict>
</plist>
EOF
}
# Create dynamic libraries from static libraries.
combine_static_libraries() {
local build_dir="$1"
local release_dir="$2"
local platform="$3" # "ios", "macos", "visionos", or "tvos"
local is_simulator="$4"
local base_dir="$(pwd)"
local framework_name="whisper"
# Determine output path based on platform
local output_lib=""
if [[ "$platform" == "macos" ]]; then
# macOS uses versioned structure
output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
else
# iOS, visionOS, and tvOS use a directory flat structure
output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
fi
local libs=(
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
)
if [[ "$platform" == "macos" || "$platform" == "ios" ]]; then
echo "Adding libwhisper.coreml library to the build."
libs+=(
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.coreml.a"
)
fi
# Create temporary directory for processing
local temp_dir="${base_dir}/${build_dir}/temp"
echo "Creating temporary directory: ${temp_dir}"
mkdir -p "${temp_dir}"
# Since we have multiple architectures libtool will find object files that do not
# match the target architecture. We suppress these warnings.
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
# Determine SDK, architectures, and install_name based on platform and simulator flag.
local sdk=""
local archs=""
local min_version_flag=""
local install_name=""
local frameworks="-framework Foundation -framework Metal -framework Accelerate"
case "$platform" in
"ios")
if [[ "$is_simulator" == "true" ]]; then
sdk="iphonesimulator"
archs="arm64 x86_64"
min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
else
sdk="iphoneos"
archs="arm64"
min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
fi
install_name="@rpath/whisper.framework/whisper"
frameworks+=" -framework CoreML"
;;
"macos")
sdk="macosx"
archs="arm64 x86_64"
min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
install_name="@rpath/whisper.framework/Versions/Current/whisper"
frameworks+=" -framework CoreML"
;;
"visionos")
if [[ "$is_simulator" == "true" ]]; then
sdk="xrsimulator"
archs="arm64 x86_64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
else
sdk="xros"
archs="arm64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
fi
# Use flat structure for visionOS, same as iOS
install_name="@rpath/whisper.framework/whisper"
;;
"tvos")
if [[ "$is_simulator" == "true" ]]; then
sdk="appletvsimulator"
archs="arm64 x86_64"
min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
else
sdk="appletvos"
archs="arm64"
min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
fi
install_name="@rpath/whisper.framework/whisper"
;;
esac
# Build architecture flags
local arch_flags=""
for arch in $archs; do
arch_flags+=" -arch $arch"
done
# Create dynamic library
echo "Creating dynamic library for ${platform}."
xcrun -sdk $sdk clang++ -dynamiclib \
-isysroot $(xcrun --sdk $sdk --show-sdk-path) \
$arch_flags \
$min_version_flag \
-Wl,-force_load,"${temp_dir}/combined.a" \
$frameworks \
-install_name "$install_name" \
-o "${base_dir}/${output_lib}"
# Platform-specific post-processing for device builds
if [[ "$is_simulator" == "false" ]]; then
if command -v vtool &>/dev/null; then
case "$platform" in
"ios")
echo "Marking binary as a framework binary for iOS..."
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"visionos")
echo "Marking binary as a framework binary for visionOS..."
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"tvos")
echo "Marking binary as a framework binary for tvOS..."
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
esac
else
echo "Warning: vtool not found. Binary may not pass App Store validation."
fi
fi
echo "Creating properly formatted dSYM..."
# Create a separate directory for dSYMs for all platforms
mkdir -p "${base_dir}/${build_dir}/dSYMs"
# iOS and visionOS style dSYM (flat structure)
if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
# Create a copy of the binary that will be stripped
cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
# Strip debug symbols from the copy
xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
# Replace the original with the stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
else
# macOS style dSYM
# First strip debug info to a separate file
xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
# Replace original binary with stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
fi
# Remove any automatically generated dSYM files in the framework structure as they will
# otherwise case Invalid Bundle Structure validation errors.
if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
rm -rf "${base_dir}/${output_lib}.dSYM"
fi
# Clean up
rm -rf "${temp_dir}"
}
echo "Building for iOS simulator..."
cmake -B build-ios-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DIOS=ON \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_SYSROOT=iphonesimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-DWHISPER_COREML="ON" \
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-S .
cmake --build build-ios-sim --config Release -- -quiet
echo "Building for iOS devices..."
cmake -B build-ios-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DCMAKE_OSX_SYSROOT=iphoneos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-DWHISPER_COREML="ON" \
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-S .
cmake --build build-ios-device --config Release -- -quiet
echo "Building for macOS..."
cmake -B build-macos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-DWHISPER_COREML="ON" \
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
-S .
cmake --build build-macos --config Release -- -quiet
echo "Building for visionOS..."
cmake -B build-visionos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xros \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos --config Release -- -quiet
echo "Building for visionOS simulator..."
cmake -B build-visionos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xrsimulator \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos-sim --config Release -- -quiet
# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
echo "Building for tvOS simulator..."
cmake -B build-tvos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvsimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-sim --config Release -- -quiet
echo "Building for tvOS devices..."
cmake -B build-tvos-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-device --config Release -- -quiet
# Setup frameworks and copy binaries and headers
echo "Setting up framework structures..."
setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
# Create dynamic libraries from static libraries
echo "Creating dynamic libraries from static libraries..."
combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
combine_static_libraries "build-macos" "Release" "macos" "false"
combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
# Create XCFramework with correct debug symbols paths
echo "Creating XCFramework..."
xcodebuild -create-xcframework \
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \
-framework $(pwd)/build-ios-device/framework/whisper.framework \
-debug-symbols $(pwd)/build-ios-device/dSYMs/whisper.dSYM \
-framework $(pwd)/build-macos/framework/whisper.framework \
-debug-symbols $(pwd)/build-macos/dSYMS/whisper.dSYM \
-framework $(pwd)/build-visionos/framework/whisper.framework \
-debug-symbols $(pwd)/build-visionos/dSYMs/whisper.dSYM \
-framework $(pwd)/build-visionos-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-visionos-sim/dSYMs/whisper.dSYM \
-framework $(pwd)/build-tvos-device/framework/whisper.framework \
-debug-symbols $(pwd)/build-tvos-device/dSYMs/whisper.dSYM \
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-tvos-sim/dSYMs/whisper.dSYM \
-output $(pwd)/build-apple/whisper.xcframework

View File

@ -1,41 +0,0 @@
# CI
In addition to [Github Actions](https://github.com/ggerganov/whisper.cpp/actions) `whisper.cpp` uses a custom CI framework:
https://github.com/ggml-org/ci
It monitors the `master` branch for new commits and runs the
[ci/run.sh](https://github.com/ggerganov/whisper.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
to cover various hardware architectures, including GPU and Apple Silicon instances.
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
Only the branches of this repo are monitored for this keyword.
It is a good practice, before publishing changes to execute the full CI locally on your machine:
```bash
mkdir tmp
# CPU-only build
bash ./ci/run.sh ./tmp/results ./tmp/mnt
# with CUDA support
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
```
## Environment Variables
The CI script supports several environment variables to control the build:
| Variable | Description |
|----------|-------------|
| `GG_BUILD_CUDA` | Enable NVIDIA CUDA GPU acceleration |
| `GG_BUILD_SYCL` | Enable Intel SYCL acceleration |
| `GG_BUILD_VULKAN` | Enable Vulkan GPU acceleration |
| `GG_BUILD_METAL` | Enable Metal acceleration on Apple Silicon |
| `GG_BUILD_BLAS` | Enable BLAS CPU acceleration |
| `GG_BUILD_OPENVINO` | Enable OpenVINO support |
| `GG_BUILD_COREML` | Enable Core ML support for Apple Neural Engine |
| `GG_BUILD_LOW_PERF` | Limit tests for low-performance hardware |
| `GG_BUILD_TEST_MODELS` | Comma-separated list of models to test (e.g. "tiny.en,tiny,base,medium", defaults to all models unless `GG_BUILD_LOW_PERF` is set) |

336
ci/run.sh
View File

@ -1,336 +0,0 @@
#!/bin/bash
#
# sample usage:
#
# mkdir tmp
#
# # CPU-only build
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with CUDA support
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with SYCL support
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
if [ -z "$2" ]; then
echo "usage: $0 <output-dir> <mnt-dir>"
exit 1
fi
mkdir -p "$1"
mkdir -p "$2"
OUT=$(realpath "$1")
MNT=$(realpath "$2")
rm -f "$OUT/*.log"
rm -f "$OUT/*.exit"
rm -f "$OUT/*.md"
sd=`dirname $0`
cd $sd/../
SRC=`pwd`
ALL_MODELS=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
BENCH_N_THREADS=4
BENCH_ENCODER_ONLY=0
BENCH_FLASH_ATTN=0
# check for user-specified models first. if not specified, use fast models
if [ ! -z ${GG_BUILD_TEST_MODELS} ]; then
IFS=',' read -r -a MODELS <<< "${GG_BUILD_TEST_MODELS}"
else
if [ ! -z ${GG_BUILD_LOW_PERF} ]; then
MODELS=( "tiny" "base" "small" )
else
MODELS=("${ALL_MODELS[@]}")
fi
fi
CMAKE_EXTRA="-DWHISPER_FATAL_WARNINGS=ON"
if [ ! -z ${GG_BUILD_CUDA} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
fi
if [ ! -z ${GG_BUILD_SYCL} ]; then
if [ -z ${ONEAPI_ROOT} ]; then
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
echo "source /opt/intel/oneapi/setvars.sh"
exit 1
fi
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
fi
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON"
fi
if [ ! -z ${GG_BUILD_METAL} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
fi
if [ ! -z ${GG_BUILD_VULKAN} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=ON"
fi
if [ ! -z ${GG_BUILD_BLAS} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_BLAS=ON"
fi
if [ ! -z ${GG_BUILD_COREML} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_COREML=ON"
fi
## helpers
# download a file if it does not exist or if it is outdated
function gg_wget {
local out=$1
local url=$2
local cwd=`pwd`
mkdir -p $out
cd $out
# should not re-download if file is the same
wget -nv -N $url
cd $cwd
}
function gg_download_model {
local model_name=$1
local model_file="$MNT/models/ggml-${model_name}.bin"
if [ ! -f ${model_file} ]; then
local cwd=`pwd`
mkdir -p "$MNT/models"
cd "$MNT/models"
bash "$cwd/models/download-ggml-model.sh" ${model_name} .
cd "$cwd"
fi
}
function gg_printf {
printf -- "$@" >> $OUT/README.md
}
# Helper function to check command exit status
function gg_check_last_command_status {
local exit_file=$1
local command_name=$2
local exit_status=$?
echo "$exit_status" > "$exit_file"
if [ $exit_status -ne 0 ]; then
echo "Error: Command $command_name failed with exit status $exit_status"
return 1
fi
return 0
}
# Usage: gg_run <test_name> [additional_args...]
#
# Parameters:
# test_name - Name of the test to run (calls gg_run_<test_name>)
# additional_args - Any additional arguments to pass to the test function (first argument is appended to the log filename)
function gg_run {
ci=$1
if [ $# -gt 1 ]; then
ci="${ci}_${2}"
fi
set -o pipefail
set -x
gg_run_$1 "$@" | tee $OUT/$ci.log
cur=$?
echo "$cur" > $OUT/$ci.exit
set +x
set +o pipefail
gg_sum_$1 "$@"
ret=$((ret | cur))
}
function gg_check_build_requirements {
if ! command -v cmake &> /dev/null; then
gg_printf 'cmake not found, please install'
fi
if ! command -v make &> /dev/null; then
gg_printf 'make not found, please install'
fi
}
## ci
function gg_run_ctest {
mode=$2
cd ${SRC}
rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode}
set -e
gg_check_build_requirements
(time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
}
function gg_sum_ctest {
mode=$2
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs ctest in '${mode}' mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}
function gg_run_bench {
cd ${SRC}
# set flash attention flag if enabled
fattn=""
if [ "$BENCH_FLASH_ATTN" -eq 1 ]; then
fattn="-fa"
fi
# run memcpy benchmark if not encoder-only mode
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
echo "Running memcpy benchmark"
(time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-memcpy.log
gg_check_last_command_status "$OUT/${ci}-memcpy.exit" "memcpy benchmark"
echo "Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads"
(time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-mul_mat.log
gg_check_last_command_status "$OUT/${ci}-mul_mat.exit" "ggml_mul_mat benchmark"
fi
echo "Running benchmark for all models"
# generate header for the benchmark table
{
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "Config" "Model" "Th" "FA" "Enc." "Dec." "Bch5" "PP" "Commit"
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---"
} | tee -a $OUT/${ci}-models-table.log
# run benchmark for each model
for model in "${MODELS[@]}"; do
echo "Benchmarking model: $model"
# run the benchmark and capture output
output=$(./build-ci-release/bin/whisper-bench -m $MNT/models/ggml-$model.bin -t $BENCH_N_THREADS $fattn 2>&1)
ret=$?
# save the raw output
echo "$output" > $OUT/${ci}-bench-$model.log
if [ $ret -eq 0 ]; then
# parse the benchmark results
encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
system_info=$(echo "$output" | grep "system_info")
actual_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
# determine configuration
config=""
if [[ $system_info == *"AVX2 = 1"* ]]; then
config="$config AVX2"
fi
if [[ $system_info == *"NEON = 1"* ]]; then
config="$config NEON"
fi
if [[ $system_info == *"BLAS = 1"* ]]; then
config="$config BLAS"
fi
if [[ $system_info == *"COREML = 1"* ]]; then
config="$config COREML"
fi
if [[ $system_info == *"CUDA = 1"* ]]; then
config="$config CUDA"
fi
if [[ $system_info == *"METAL = 1"* ]]; then
config="$config METAL"
fi
# get commit hash
commit=$(git rev-parse --short HEAD)
# add row to benchmark table
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" \
"$config" "$model" "$actual_threads" "$BENCH_FLASH_ATTN" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit" \
| tee -a $OUT/${ci}-models-table.log
else
echo "Benchmark failed for model: $model" | tee -a $OUT/${ci}-bench-errors.log
fi
done
}
function gg_sum_bench {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Whisper Benchmark Results\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
# show memcpy and ggml_mul_mat benchmark results if available
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
if [ -f "$OUT/${ci}-memcpy.log" ]; then
gg_printf '#### memcpy Benchmark\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-memcpy.log)"
fi
if [ -f "$OUT/${ci}-mul_mat.log" ]; then
gg_printf '#### ggml_mul_mat Benchmark\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-mul_mat.log)"
fi
fi
# show model benchmark results
gg_printf '#### Model Benchmarks\n\n'
if [ -f "$OUT/${ci}-models-table.log" ]; then
gg_printf '%s\n\n' "$(cat $OUT/${ci}-models-table.log)"
else
gg_printf 'No model benchmark results available.\n\n'
fi
# show any errors that occurred
if [ -f "$OUT/${ci}-bench-errors.log" ]; then
gg_printf '#### Benchmark Errors\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-bench-errors.log)"
fi
}
ret=0
for model in "${MODELS[@]}"; do
test $ret -eq 0 && gg_download_model ${model}
done
if [ -z ${GG_BUILD_SYCL}]; then
test $ret -eq 0 && gg_run ctest debug
fi
test $ret -eq 0 && gg_run ctest release
test $ret -eq 0 && gg_run bench
exit $ret

View File

@ -14,6 +14,10 @@ if (WHISPER_SDL2)
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()
if (WHISPER_CLBLAST)
find_package(CLBlast REQUIRED)
endif()
# common
set(TARGET common)
@ -52,8 +56,6 @@ add_library(${TARGET} STATIC
common.cpp
common-ggml.h
common-ggml.cpp
common-whisper.h
common-whisper.cpp
grammar-parser.h
grammar-parser.cpp
${COMMON_SOURCES_FFMPEG}
@ -61,7 +63,7 @@ add_library(${TARGET} STATIC
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS})
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")

View File

@ -18,7 +18,6 @@ const whisperParamsMock = {
translate: true,
no_timestamps: false,
audio_ctx: 0,
max_len: 0,
};
describe("Run whisper.node", () => {

View File

@ -1,6 +1,5 @@
#include "napi.h"
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
@ -128,227 +127,192 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
void cb_log_disable(enum ggml_log_level, const char *, void *) {}
class ProgressWorker : public Napi::AsyncWorker {
public:
ProgressWorker(Napi::Function& callback, whisper_params params, Napi::Function progress_callback, Napi::Env env)
: Napi::AsyncWorker(callback), params(params), env(env) {
// Create thread-safe function
if (!progress_callback.IsEmpty()) {
tsfn = Napi::ThreadSafeFunction::New(
env,
progress_callback,
"Progress Callback",
0,
1
);
}
int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
if (params.no_prints) {
whisper_log_set(cb_log_disable, NULL);
}
~ProgressWorker() {
if (tsfn) {
// Make sure to release the thread-safe function on destruction
tsfn.Release();
}
if (params.fname_inp.empty() && params.pcmf32.empty()) {
fprintf(stderr, "error: no input files or audio buffer specified\n");
return 2;
}
void Execute() override {
// Use custom run function with progress callback support
run_with_progress(params, result);
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
exit(0);
}
void OnOK() override {
Napi::HandleScope scope(Env());
Napi::Object res = Napi::Array::New(Env(), result.size());
for (uint64_t i = 0; i < result.size(); ++i) {
Napi::Object tmp = Napi::Array::New(Env(), 3);
for (uint64_t j = 0; j < 3; ++j) {
tmp[j] = Napi::String::New(Env(), result[i][j]);
// whisper init
struct whisper_context_params cparams = whisper_context_default_params();
cparams.use_gpu = params.use_gpu;
cparams.flash_attn = params.flash_attn;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
return 3;
}
// if params.pcmf32 is provided, set params.fname_inp to "buffer"
// this is simpler than further modifications in the code
if (!params.pcmf32.empty()) {
fprintf(stderr, "info: using audio buffer as input\n");
params.fname_inp.clear();
params.fname_inp.emplace_back("buffer");
}
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
const auto fname_inp = params.fname_inp[f];
const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
// read the input audio file if params.pcmf32 is not provided
if (params.pcmf32.empty()) {
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
continue;
}
} else {
pcmf32 = params.pcmf32;
}
// print system information
if (!params.no_prints) {
fprintf(stderr, "\n");
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
}
// print some info about the processing
if (!params.no_prints) {
fprintf(stderr, "\n");
if (!whisper_is_multilingual(ctx)) {
if (params.language != "en" || params.translate) {
params.language = "en";
params.translate = false;
fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
}
}
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
__func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
params.n_threads, params.n_processors,
params.language.c_str(),
params.translate ? "translate" : "transcribe",
params.no_timestamps ? 0 : 1,
params.audio_ctx);
fprintf(stderr, "\n");
}
// run the inference
{
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
wparams.print_realtime = false;
wparams.print_progress = params.print_progress;
wparams.print_timestamps = !params.no_timestamps;
wparams.print_special = params.print_special;
wparams.translate = params.translate;
wparams.language = params.language.c_str();
wparams.n_threads = params.n_threads;
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
wparams.offset_ms = params.offset_t_ms;
wparams.duration_ms = params.duration_ms;
wparams.token_timestamps = params.output_wts || params.max_len > 0;
wparams.thold_pt = params.word_thold;
wparams.entropy_thold = params.entropy_thold;
wparams.logprob_thold = params.logprob_thold;
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
wparams.audio_ctx = params.audio_ctx;
wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;
wparams.initial_prompt = params.prompt.c_str();
wparams.no_timestamps = params.no_timestamps;
whisper_print_user_data user_data = { &params, &pcmf32s };
// this callback is called on each new segment
if (!wparams.print_realtime) {
wparams.new_segment_callback = whisper_print_segment_callback;
wparams.new_segment_callback_user_data = &user_data;
}
// example for abort mechanism
// in this example, we do not abort the processing, but we could if the flag is set to true
// the callback is called before every encoder run - if it returns false, the processing is aborted
{
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
bool is_aborted = *(bool*)user_data;
return !is_aborted;
};
wparams.encoder_begin_callback_user_data = &is_aborted;
}
if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
fprintf(stderr, "failed to process audio\n");
return 10;
}
res[i] = tmp;
}
Callback().Call({Env().Null(), res});
}
// Progress callback function - using thread-safe function
void OnProgress(int progress) {
if (tsfn) {
// Use thread-safe function to call JavaScript callback
auto callback = [progress](Napi::Env env, Napi::Function jsCallback) {
jsCallback.Call({Napi::Number::New(env, progress)});
};
tsfn.BlockingCall(callback);
}
const int n_segments = whisper_full_n_segments(ctx);
result.resize(n_segments);
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(ctx, i);
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
result[i].emplace_back(text);
}
whisper_print_timings(ctx);
whisper_free(ctx);
return 0;
}
class Worker : public Napi::AsyncWorker {
public:
Worker(Napi::Function& callback, whisper_params params)
: Napi::AsyncWorker(callback), params(params) {}
void Execute() override {
run(params, result);
}
void OnOK() override {
Napi::HandleScope scope(Env());
Napi::Object res = Napi::Array::New(Env(), result.size());
for (uint64_t i = 0; i < result.size(); ++i) {
Napi::Object tmp = Napi::Array::New(Env(), 3);
for (uint64_t j = 0; j < 3; ++j) {
tmp[j] = Napi::String::New(Env(), result[i][j]);
}
res[i] = tmp;
}
Callback().Call({Env().Null(), res});
}
private:
whisper_params params;
std::vector<std::vector<std::string>> result;
Napi::Env env;
Napi::ThreadSafeFunction tsfn;
// Custom run function with progress callback support
int run_with_progress(whisper_params &params, std::vector<std::vector<std::string>> &result) {
if (params.no_prints) {
whisper_log_set(cb_log_disable, NULL);
}
if (params.fname_inp.empty() && params.pcmf32.empty()) {
fprintf(stderr, "error: no input files or audio buffer specified\n");
return 2;
}
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
exit(0);
}
// whisper init
struct whisper_context_params cparams = whisper_context_default_params();
cparams.use_gpu = params.use_gpu;
cparams.flash_attn = params.flash_attn;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
return 3;
}
// If params.pcmf32 provides, set params.fname_inp as "buffer"
if (!params.pcmf32.empty()) {
fprintf(stderr, "info: using audio buffer as input\n");
params.fname_inp.clear();
params.fname_inp.emplace_back("buffer");
}
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
const auto fname_inp = params.fname_inp[f];
const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
// If params.pcmf32 is empty, read input audio file
if (params.pcmf32.empty()) {
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
continue;
}
} else {
pcmf32 = params.pcmf32;
}
// Print system info
if (!params.no_prints) {
fprintf(stderr, "\n");
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
}
// Print processing info
if (!params.no_prints) {
fprintf(stderr, "\n");
if (!whisper_is_multilingual(ctx)) {
if (params.language != "en" || params.translate) {
params.language = "en";
params.translate = false;
fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
}
}
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
__func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
params.n_threads, params.n_processors,
params.language.c_str(),
params.translate ? "translate" : "transcribe",
params.no_timestamps ? 0 : 1,
params.audio_ctx);
fprintf(stderr, "\n");
}
// Run inference
{
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
wparams.print_realtime = false;
wparams.print_progress = params.print_progress;
wparams.print_timestamps = !params.no_timestamps;
wparams.print_special = params.print_special;
wparams.translate = params.translate;
wparams.language = params.language.c_str();
wparams.n_threads = params.n_threads;
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
wparams.offset_ms = params.offset_t_ms;
wparams.duration_ms = params.duration_ms;
wparams.token_timestamps = params.output_wts || params.max_len > 0;
wparams.thold_pt = params.word_thold;
wparams.entropy_thold = params.entropy_thold;
wparams.logprob_thold = params.logprob_thold;
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
wparams.audio_ctx = params.audio_ctx;
wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;
wparams.initial_prompt = params.prompt.c_str();
wparams.no_timestamps = params.no_timestamps;
whisper_print_user_data user_data = { &params, &pcmf32s };
// This callback is called for each new segment
if (!wparams.print_realtime) {
wparams.new_segment_callback = whisper_print_segment_callback;
wparams.new_segment_callback_user_data = &user_data;
}
// Set progress callback
wparams.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
ProgressWorker* worker = static_cast<ProgressWorker*>(user_data);
worker->OnProgress(progress);
};
wparams.progress_callback_user_data = this;
// Abort mechanism example
{
static bool is_aborted = false; // Note: this should be atomic to avoid data races
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
bool is_aborted = *(bool*)user_data;
return !is_aborted;
};
wparams.encoder_begin_callback_user_data = &is_aborted;
}
if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
fprintf(stderr, "failed to process audio\n");
return 10;
}
}
}
const int n_segments = whisper_full_n_segments(ctx);
result.resize(n_segments);
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(ctx, i);
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
result[i].emplace_back(text);
}
whisper_print_timings(ctx);
whisper_free(ctx);
return 0;
}
whisper_params params;
std::vector<std::vector<std::string>> result;
};
Napi::Value whisper(const Napi::CallbackInfo& info) {
Napi::Env env = info.Env();
if (info.Length() <= 0 || !info[0].IsObject()) {
@ -367,23 +331,6 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
bool comma_in_time = whisper_params.Get("comma_in_time").As<Napi::Boolean>();
int32_t max_len = whisper_params.Get("max_len").As<Napi::Number>();
// support prompt
std::string prompt = "";
if (whisper_params.Has("prompt") && whisper_params.Get("prompt").IsString()) {
prompt = whisper_params.Get("prompt").As<Napi::String>();
}
// Add support for print_progress
bool print_progress = false;
if (whisper_params.Has("print_progress")) {
print_progress = whisper_params.Get("print_progress").As<Napi::Boolean>();
}
// Add support for progress_callback
Napi::Function progress_callback;
if (whisper_params.Has("progress_callback") && whisper_params.Get("progress_callback").IsFunction()) {
progress_callback = whisper_params.Get("progress_callback").As<Napi::Function>();
}
Napi::Value pcmf32Value = whisper_params.Get("pcmf32");
std::vector<float> pcmf32_vec;
@ -407,12 +354,9 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
params.pcmf32 = pcmf32_vec;
params.comma_in_time = comma_in_time;
params.max_len = max_len;
params.print_progress = print_progress;
params.prompt = prompt;
Napi::Function callback = info[1].As<Napi::Function>();
// Create a new Worker class with progress callback support
ProgressWorker* worker = new ProgressWorker(callback, params, progress_callback, env);
Worker* worker = new Worker(callback, params);
worker->Queue();
return env.Undefined();
}

View File

@ -19,9 +19,6 @@ const whisperParams = {
no_timestamps: false,
audio_ctx: 0,
max_len: 0,
progress_callback: (progress) => {
console.log(`progress: ${progress}%`);
}
};
const arguments = process.argv.slice(2);

View File

@ -2,7 +2,7 @@
Benchmark the performance of whisper.cpp in the browser using WebAssembly
Link: https://ggerganov.github.io/whisper.cpp/bench.wasm
Link: https://whisper.ggerganov.com/bench/
Terminal version: [examples/bench](/examples/bench)
@ -15,17 +15,7 @@ cd whisper.cpp
mkdir build-em && cd build-em
emcmake cmake ..
make -j
```
The example can then be started by running a local HTTP server:
```console
python3 examples/server.py
```
And then opening a browser to the following URL:
http://localhost:8000/bench.wasm
To run the example in a different server, you need to copy the following files
to the server's HTTP path:
```
# copy the produced page to your HTTP path
cp bin/bench.wasm/* /path/to/html/
cp bin/libbench.worker.js /path/to/html/

View File

@ -24,8 +24,6 @@
overflow-x: scroll;
}
</style>
<script src="../coi-serviceworker.js"></script>
<link rel="icon" href="data:,">
</head>
<body>
<div id="main-container">
@ -38,10 +36,11 @@
<br><br>
<b>More examples:</b>
<a href="../">main</a> |
<a href="../bench.wasm/">bench</a> |
<a href="../stream.wasm">stream</a> |
<a href="../command.wasm/">command</a> |
<a href="https://whisper.ggerganov.com/">main</a> |
<a href="https://whisper.ggerganov.com/bench">bench</a> |
<a href="https://whisper.ggerganov.com/stream">stream</a> |
<a href="https://whisper.ggerganov.com/command">command</a> |
<a href="https://whisper.ggerganov.com/talk">talk</a> |
<br><br>

View File

@ -50,11 +50,11 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " %-7s 0 - whisper\n", "");
fprintf(stderr, " %-7s 1 - memcpy\n", "");
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, "\n");
}

View File

@ -1,5 +1,4 @@
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
#include "grammar-parser.h"
@ -7,15 +6,14 @@
#include <cmath>
#include <fstream>
#include <cstdio>
#include <regex>
#include <string>
#include <thread>
#include <vector>
#include <cstring>
#if defined(_WIN32)
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif
@ -201,8 +199,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options] file0 file1 ...\n", argv[0]);
fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n");
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
@ -247,7 +244,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " -dl, --detect-language [%-7s] exit after automatically detecting language\n", params.detect_language ? "true" : "false");
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt (max n_text_ctx/2 tokens)\n", params.prompt.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
@ -1072,8 +1069,8 @@ int main(int argc, char ** argv) {
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
continue;
}

View File

@ -1,146 +0,0 @@
/*! coi-serviceworker v0.1.7 - Guido Zuidhof and contributors, licensed under MIT */
let coepCredentialless = false;
if (typeof window === 'undefined') {
self.addEventListener("install", () => self.skipWaiting());
self.addEventListener("activate", (event) => event.waitUntil(self.clients.claim()));
self.addEventListener("message", (ev) => {
if (!ev.data) {
return;
} else if (ev.data.type === "deregister") {
self.registration
.unregister()
.then(() => {
return self.clients.matchAll();
})
.then(clients => {
clients.forEach((client) => client.navigate(client.url));
});
} else if (ev.data.type === "coepCredentialless") {
coepCredentialless = ev.data.value;
}
});
self.addEventListener("fetch", function (event) {
const r = event.request;
if (r.cache === "only-if-cached" && r.mode !== "same-origin") {
return;
}
const request = (coepCredentialless && r.mode === "no-cors")
? new Request(r, {
credentials: "omit",
})
: r;
event.respondWith(
fetch(request)
.then((response) => {
if (response.status === 0) {
return response;
}
const newHeaders = new Headers(response.headers);
newHeaders.set("Cross-Origin-Embedder-Policy",
coepCredentialless ? "credentialless" : "require-corp"
);
if (!coepCredentialless) {
newHeaders.set("Cross-Origin-Resource-Policy", "cross-origin");
}
newHeaders.set("Cross-Origin-Opener-Policy", "same-origin");
return new Response(response.body, {
status: response.status,
statusText: response.statusText,
headers: newHeaders,
});
})
.catch((e) => console.error(e))
);
});
} else {
(() => {
const reloadedBySelf = window.sessionStorage.getItem("coiReloadedBySelf");
window.sessionStorage.removeItem("coiReloadedBySelf");
const coepDegrading = (reloadedBySelf == "coepdegrade");
// You can customize the behavior of this script through a global `coi` variable.
const coi = {
shouldRegister: () => !reloadedBySelf,
shouldDeregister: () => false,
coepCredentialless: () => true,
coepDegrade: () => true,
doReload: () => window.location.reload(),
quiet: false,
...window.coi
};
const n = navigator;
const controlling = n.serviceWorker && n.serviceWorker.controller;
// Record the failure if the page is served by serviceWorker.
if (controlling && !window.crossOriginIsolated) {
window.sessionStorage.setItem("coiCoepHasFailed", "true");
}
const coepHasFailed = window.sessionStorage.getItem("coiCoepHasFailed");
if (controlling) {
// Reload only on the first failure.
const reloadToDegrade = coi.coepDegrade() && !(
coepDegrading || window.crossOriginIsolated
);
n.serviceWorker.controller.postMessage({
type: "coepCredentialless",
value: (reloadToDegrade || coepHasFailed && coi.coepDegrade())
? false
: coi.coepCredentialless(),
});
if (reloadToDegrade) {
!coi.quiet && console.log("Reloading page to degrade COEP.");
window.sessionStorage.setItem("coiReloadedBySelf", "coepdegrade");
coi.doReload("coepdegrade");
}
if (coi.shouldDeregister()) {
n.serviceWorker.controller.postMessage({ type: "deregister" });
}
}
// If we're already coi: do nothing. Perhaps it's due to this script doing its job, or COOP/COEP are
// already set from the origin server. Also if the browser has no notion of crossOriginIsolated, just give up here.
if (window.crossOriginIsolated !== false || !coi.shouldRegister()) return;
if (!window.isSecureContext) {
!coi.quiet && console.log("COOP/COEP Service Worker not registered, a secure context is required.");
return;
}
// In some environments (e.g. Firefox private mode) this won't be available
if (!n.serviceWorker) {
!coi.quiet && console.error("COOP/COEP Service Worker not registered, perhaps due to private mode.");
return;
}
n.serviceWorker.register(window.document.currentScript.src).then(
(registration) => {
!coi.quiet && console.log("COOP/COEP Service Worker registered", registration.scope);
registration.addEventListener("updatefound", () => {
!coi.quiet && console.log("Reloading page to make use of updated COOP/COEP Service Worker.");
window.sessionStorage.setItem("coiReloadedBySelf", "updatefound");
coi.doReload();
});
// If the registration is active, but it's not controlling the page
if (registration.active && !n.serviceWorker.controller) {
!coi.quiet && console.log("Reloading page to make use of COOP/COEP Service Worker.");
window.sessionStorage.setItem("coiReloadedBySelf", "notcontrolling");
coi.doReload();
}
},
(err) => {
!coi.quiet && console.error("COOP/COEP Service Worker failed to register:", err);
}
);
})();
}

View File

@ -3,7 +3,7 @@
This is a basic Voice Assistant example that accepts voice commands from the microphone.
It runs in fully in the browser via WebAseembly.
Online demo: https://ggerganov.github.io/whisper.cpp/command.wasm
Online demo: https://whisper.ggerganov.com/command/
Terminal version: [examples/command](/examples/command)
@ -15,18 +15,9 @@ git clone https://github.com/ggerganov/whisper.cpp
cd whisper.cpp
mkdir build-em && cd build-em
emcmake cmake ..
make -j libcommand
```
The example can then be started by running a local HTTP server:
```console
python3 examples/server.py
```
And then opening a browser to the following URL:
http://localhost:8000/command.wasm/
make -j
To run the example in a different server, you need to copy the following files
to the server's HTTP path:
```
# copy the produced page to your HTTP path
cp bin/command.wasm/* /path/to/html/
cp bin/libcommand.worker.js /path/to/html/
```

View File

@ -24,8 +24,6 @@
overflow-x: scroll;
}
</style>
<script src="../coi-serviceworker.js"></script>
<link rel="icon" href="data:,">
</head>
<body>
<div id="main-container">
@ -38,10 +36,11 @@
<br><br>
<b>More examples:</b>
<a href="../">main</a> |
<a href="../bench.wasm/">bench</a> |
<a href="../stream.wasm">stream</a> |
<a href="../command.wasm/">command</a> |
<a href="https://whisper.ggerganov.com/">main</a> |
<a href="https://whisper.ggerganov.com/bench">bench</a> |
<a href="https://whisper.ggerganov.com/stream">stream</a> |
<a href="https://whisper.ggerganov.com/command">command</a> |
<a href="https://whisper.ggerganov.com/talk">talk</a> |
<br><br>

View File

@ -11,15 +11,22 @@
#include "whisper.h"
#include "grammar-parser.h"
#include <algorithm>
#include <chrono>
#include <sstream>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <map>
#include <sstream>
#include <mutex>
#include <regex>
#include <string>
#include <thread>
#include <vector>
#include <map>
#include <chrono>
#if defined(_WIN32)
#define NOMINMAX
#include <windows.h>
#endif
// command-line parameters
struct whisper_params {
@ -678,6 +685,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
}
int main(int argc, char ** argv) {
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
#endif
whisper_params params;
if (whisper_params_parse(argc, argv, params) == false) {

View File

@ -159,11 +159,15 @@ void audio_async::callback(uint8_t * stream, int len) {
memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = m_audio.size();
} else {
memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
}
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
}
}

View File

@ -1,172 +0,0 @@
#define _USE_MATH_DEFINES // for M_PI
#include "common-whisper.h"
#include "common.h"
#include "whisper.h"
// third-party utilities
// use your favorite implementations
#define STB_VORBIS_HEADER_ONLY
#include "stb_vorbis.c" /* Enables Vorbis decoding. */
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#endif
#define MA_NO_DEVICE_IO
#define MA_NO_THREADING
#define MA_NO_ENCODING
#define MA_NO_GENERATION
#define MA_NO_RESOURCE_MANAGER
#define MA_NO_NODE_GRAPH
#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#include <cstring>
#include <fstream>
#ifdef WHISPER_FFMPEG
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
#endif
bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
std::vector<uint8_t> audio_data; // used for pipe input from stdin or ffmpeg decoding output
ma_result result;
ma_decoder_config decoder_config;
ma_decoder decoder;
decoder_config = ma_decoder_config_init(ma_format_f32, stereo ? 2 : 1, WHISPER_SAMPLE_RATE);
if (fname == "-") {
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
#endif
uint8_t buf[1024];
while (true)
{
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
audio_data.insert(audio_data.end(), buf, buf + n);
}
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result));
return false;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
}
else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, audio_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());
return false;
}
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
return false;
}
#else
if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
return false;
}
#endif
}
ma_uint64 frame_count;
ma_uint64 frames_read;
if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result));
return false;
}
pcmf32.resize(stereo ? frame_count*2 : frame_count);
if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result));
return false;
}
if (stereo) {
pcmf32s.resize(2);
pcmf32s[0].resize(frame_count);
pcmf32s[1].resize(frame_count);
for (uint64_t i = 0; i < frame_count; i++) {
pcmf32s[0][i] = pcmf32[2*i];
pcmf32s[1][i] = pcmf32[2*i + 1];
}
}
ma_decoder_uninit(&decoder);
return true;
}
// 500 -> 00:05.000
// 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma) {
int64_t msec = t * 10;
int64_t hr = msec / (1000 * 60 * 60);
msec = msec - hr * (1000 * 60 * 60);
int64_t min = msec / (1000 * 60);
msec = msec - min * (1000 * 60);
int64_t sec = msec / 1000;
msec = msec - sec * 1000;
char buf[32];
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
return std::string(buf);
}
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
}
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id) {
std::ofstream speak_file(path.c_str());
if (speak_file.fail()) {
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
return false;
} else {
speak_file.write(text.c_str(), text.size());
speak_file.close();
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
return false;
}
}
return true;
}
#undef STB_VORBIS_HEADER_ONLY
#include "stb_vorbis.c"

View File

@ -1,24 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <cstdint>
// Read WAV audio file and store the PCM data into pcmf32
// fname can be a buffer of WAV data instead of a filename
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
bool read_audio_data(
const std::string & fname,
std::vector<float> & pcmf32,
std::vector<std::vector<float>> & pcmf32s,
bool stereo);
// convert timestamp to string, 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma = false);
// given a timestamp get the sample
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);

View File

@ -2,18 +2,33 @@
#include "common.h"
// third-party utilities
// use your favorite implementations
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include <cmath>
#include <codecvt>
#include <cstring>
#include <fstream>
#include <locale>
#include <regex>
#include <locale>
#include <codecvt>
#include <sstream>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#ifdef WHISPER_FFMPEG
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
#endif
// Function to check if the next argument exists
static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
if (i + 1 < argc && argv[i + 1][0] != '-') {
@ -247,6 +262,17 @@ std::map<std::string, int32_t> json_parse(const std::string & fname) {
return result;
}
std::string convert_to_utf8(const std::wstring & input) {
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.to_bytes(input);
}
std::wstring convert_to_wstring(const std::string & input) {
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.from_bytes(input);
}
void gpt_split_words(std::string str, std::vector<std::string>& words) {
const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
const std::regex re(pattern);
@ -598,6 +624,129 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
}
bool is_wav_buffer(const std::string buf) {
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") {
return false;
}
uint32_t chunk_size = *reinterpret_cast<const uint32_t*>(buf.data() + 4);
if (chunk_size + 8 != buf.size()) {
return false;
}
return true;
}
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
drwav wav;
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
if (fname == "-") {
{
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
#endif
uint8_t buf[1024];
while (true)
{
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
wav_data.insert(wav_data.end(), buf, buf + n);
}
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from stdin\n");
return false;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
}
else if (is_wav_buffer(fname)) {
if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from fname buffer\n");
return false;
}
}
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
return false;
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to read wav data as wav \n");
return false;
}
#else
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
return false;
#endif
}
if (wav.channels != 1 && wav.channels != 2) {
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
if (stereo && wav.channels != 2) {
fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
drwav_uninit(&wav);
return false;
}
if (wav.bitsPerSample != 16) {
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
std::vector<int16_t> pcm16;
pcm16.resize(n*wav.channels);
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
drwav_uninit(&wav);
// convert to mono, float
pcmf32.resize(n);
if (wav.channels == 1) {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[i])/32768.0f;
}
} else {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
}
}
if (stereo) {
// convert to stereo, float
pcmf32s.resize(2);
pcmf32s[0].resize(n);
pcmf32s[1].resize(n);
for (uint64_t i = 0; i < n; i++) {
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
}
}
return true;
}
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
const float rc = 1.0f / (2.0f * M_PI * cutoff);
const float dt = 1.0f / sample_rate;
@ -673,7 +822,90 @@ float similarity(const std::string & s0, const std::string & s1) {
return 1.0f - (dist / std::max(s0.size(), s1.size()));
}
bool is_file_exist(const char * filename) {
std::ifstream infile(filename);
bool sam_params_parse(int argc, char ** argv, sam_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
if (arg == "-s" || arg == "--seed") {
params.seed = std::stoi(argv[++i]);
} else if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
params.model = argv[++i];
} else if (arg == "-i" || arg == "--inp") {
params.fname_inp = argv[++i];
} else if (arg == "-o" || arg == "--out") {
params.fname_out = argv[++i];
} else if (arg == "-h" || arg == "--help") {
sam_print_usage(argc, argv, params);
exit(0);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
sam_print_usage(argc, argv, params);
exit(0);
}
}
return true;
}
void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
fprintf(stderr, " -i FNAME, --inp FNAME\n");
fprintf(stderr, " input file (default: %s)\n", params.fname_inp.c_str());
fprintf(stderr, " -o FNAME, --out FNAME\n");
fprintf(stderr, " output file (default: %s)\n", params.fname_out.c_str());
fprintf(stderr, "\n");
}
// 500 -> 00:05.000
// 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma) {
int64_t msec = t * 10;
int64_t hr = msec / (1000 * 60 * 60);
msec = msec - hr * (1000 * 60 * 60);
int64_t min = msec / (1000 * 60);
msec = msec - min * (1000 * 60);
int64_t sec = msec / 1000;
msec = msec - sec * 1000;
char buf[32];
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
return std::string(buf);
}
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
}
bool is_file_exist(const char *fileName)
{
std::ifstream infile(fileName);
return infile.good();
}
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id)
{
std::ofstream speak_file(path.c_str());
if (speak_file.fail()) {
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
return false;
} else {
speak_file.write(text.c_str(), text.size());
speak_file.close();
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
return false;
}
}
return true;
}

View File

@ -11,6 +11,8 @@
#include <fstream>
#include <sstream>
#define COMMON_SAMPLE_RATE 16000
//
// GPT CLI argument parsing
//
@ -134,6 +136,19 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
// Audio utils
//
// Check if a buffer is a WAV audio file
bool is_wav_buffer(const std::string buf);
// Read WAV audio file and store the PCM data into pcmf32
// fname can be a buffer of WAV data instead of a filename
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
bool read_wav(
const std::string & fname,
std::vector<float> & pcmf32,
std::vector<std::vector<float>> & pcmf32s,
bool stereo);
// Write PCM data into WAV audio file
class wav_writer {
private:
@ -251,6 +266,23 @@ bool vad_simple(
// compute similarity between two strings using Levenshtein distance
float similarity(const std::string & s0, const std::string & s1);
//
// SAM argument parsing
//
struct sam_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
std::string fname_inp = "img.jpg";
std::string fname_out = "img.out";
};
bool sam_params_parse(int argc, char ** argv, sam_params & params);
void sam_print_usage(int argc, char ** argv, const sam_params & params);
//
// Terminal utils
//
@ -298,5 +330,14 @@ const std::vector<std::string> k_colors = {
// Other utils
//
// convert timestamp to string, 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma = false);
// given a timestamp get the sample
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
// check if file exists using ifstream
bool is_file_exist(const char * filename);
bool is_file_exist(const char *fileName);
// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);

View File

@ -1,6 +1,4 @@
add_executable(main ./deprecation-warning.cpp)
add_executable(bench ./deprecation-warning.cpp)
if (WHISPER_SDL2)
add_executable(stream ./deprecation-warning.cpp)
add_executable(command ./deprecation-warning.cpp)
endif()
add_executable(stream ./deprecation-warning.cpp)
add_executable(command ./deprecation-warning.cpp)

8815
examples/dr_wav.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -41,17 +41,20 @@ fi
# record some raw audio
sox -d rec.wav
# resample to 16kHz
ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1
# run Whisper
echo "Processing ..."
${executable} -m models/ggml-base.en.bin rec.wav -owts > /dev/null 2>&1
${executable} -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1
# generate Karaoke video
echo "Generating video ..."
source rec.wav.wts > /dev/null 2>&1
source rec16.wav.wts > /dev/null 2>&1
# play the video
echo "Playing ./rec16.wav.mp4 ..."
ffplay -loglevel 0 -autoexit ./rec.wav.mp4
ffplay -loglevel 0 -autoexit ./rec16.wav.mp4
echo "Done"
exit 0

View File

@ -3,15 +3,15 @@
#include "whisper.h"
#include "json.hpp"
#include <cassert>
#include <chrono>
#include <cstdio>
#include <deque>
#include <iostream>
#include <set>
#include <cassert>
#include <cstdio>
#include <string>
#include <thread>
#include <vector>
#include <deque>
#include <set>
#include <chrono>
using json = nlohmann::json;

File diff suppressed because it is too large Load Diff

View File

@ -1,39 +0,0 @@
import http.server
import socketserver
import os
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent.absolute()
DIRECTORY = os.path.join(SCRIPT_DIR, "../build-em/bin")
DIRECTORY = os.path.abspath(DIRECTORY)
class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=DIRECTORY, **kwargs)
def do_GET(self):
# If requesting a worker file from any subdirectory
if '.worker.js' in self.path:
worker_file = os.path.basename(self.path)
worker_path = os.path.join(DIRECTORY, worker_file)
if os.path.exists(worker_path):
self.path = '/' + worker_file
return super().do_GET()
def end_headers(self):
# Add required headers for SharedArrayBuffer
self.send_header("Cross-Origin-Opener-Policy", "same-origin")
self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
self.send_header("Access-Control-Allow-Origin", "*");
super().end_headers()
PORT = 8000
with socketserver.TCPServer(("", PORT), CustomHTTPRequestHandler) as httpd:
print(f"Serving directory '{DIRECTORY}' at http://localhost:{PORT}")
try:
httpd.serve_forever()
except KeyboardInterrupt:
print("\nServer stopped.")

View File

@ -1,18 +1,18 @@
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
#include "httplib.h"
#include "json.hpp"
#include <chrono>
#include <cmath>
#include <cstdio>
#include <fstream>
#include <sstream>
#include <cstdio>
#include <string>
#include <thread>
#include <vector>
#include <cstring>
#include <sstream>
#include <chrono>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
@ -723,8 +723,8 @@ int main(int argc, char ** argv) {
return;
}
// read audio content into pcmf32
if (!::read_audio_data(temp_filename, pcmf32, pcmf32s, params.diarize))
// read wav content into pcmf32
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize))
{
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
@ -735,10 +735,10 @@ int main(int argc, char ** argv) {
// remove temp file
std::remove(temp_filename.c_str());
} else {
if (!::read_audio_data(audio_file.content, pcmf32, pcmf32s, params.diarize))
if (!::read_wav(audio_file.content, pcmf32, pcmf32s, params.diarize))
{
fprintf(stderr, "error: failed to read audio data\n");
const std::string error_resp = "{\"error\":\"failed to read audio data\"}";
fprintf(stderr, "error: failed to read WAV file\n");
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
res.set_content(error_resp, "application/json");
return;
}
@ -1024,11 +1024,6 @@ int main(int argc, char ** argv) {
// check if the model is in the file system
});
svr.Get(sparams.request_path + "/health", [&](const Request &, Response &res){
const std::string health_response = "{\"status\":\"ok\"}";
res.set_content(health_response, "application/json");
});
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
const char fmt[] = "500 Internal Server Error\n%s";
char buf[BUFSIZ];

File diff suppressed because it is too large Load Diff

View File

@ -13,17 +13,7 @@ cd whisper.cpp
mkdir build-em && cd build-em
emcmake cmake ..
make -j
```
The example can then be started by running a local HTTP server:
```console
python3 examples/server.py
```
And then opening a browser to the following URL:
http://localhost:8000/stream.wasm
To run the example in a different server, you need to copy the following files
to the server's HTTP path:
```
# copy the produced page to your HTTP path
cp bin/stream.wasm/* /path/to/html/
cp bin/libstream.worker.js /path/to/html/

View File

@ -24,8 +24,6 @@
overflow-x: scroll;
}
</style>
<script src="../coi-serviceworker.js"></script>
<link rel="icon" href="data:,">
</head>
<body>
<div id="main-container">
@ -38,10 +36,11 @@
<br><br>
<b>More examples:</b>
<a href="../">main</a> |
<a href="../bench.wasm/">bench</a> |
<a href="../stream.wasm">stream</a> |
<a href="../command.wasm/">command</a> |
<a href="https://whisper.ggerganov.com/">main</a> |
<a href="https://whisper.ggerganov.com/bench">bench</a> |
<a href="https://whisper.ggerganov.com/stream">stream</a> |
<a href="https://whisper.ggerganov.com/command">command</a> |
<a href="https://whisper.ggerganov.com/talk">talk</a> |
<br><br>

View File

@ -4,15 +4,20 @@
//
#include "common-sdl.h"
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
#include <chrono>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <string>
#include <thread>
#include <vector>
#include <fstream>
#include <chrono>
#if defined(_WIN32)
#define NOMINMAX
#include <windows.h>
#endif
// command-line parameters
struct whisper_params {
@ -23,7 +28,6 @@ struct whisper_params {
int32_t capture_id = -1;
int32_t max_tokens = 32;
int32_t audio_ctx = 0;
int32_t beam_size = -1;
float vad_thold = 0.6f;
float freq_thold = 100.0f;
@ -60,7 +64,6 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
@ -98,7 +101,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
@ -116,6 +118,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
}
int main(int argc, char ** argv) {
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
#endif
whisper_params params;
if (whisper_params_parse(argc, argv, params) == false) {
@ -160,6 +166,7 @@ int main(int argc, char ** argv) {
cparams.use_gpu = params.use_gpu;
cparams.flash_attn = params.flash_attn;
fprintf(stderr, "whisper_init_from_file_with_params ...\n");
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
@ -169,6 +176,8 @@ int main(int argc, char ** argv) {
std::vector<whisper_token> prompt_tokens;
// print some info about the processing
fprintf(stderr, "whisper_init_from_file_with_params ok\n");
{
fprintf(stderr, "\n");
if (!whisper_is_multilingual(ctx)) {
@ -244,11 +253,6 @@ int main(int argc, char ** argv) {
if (!use_vad) {
while (true) {
// handle Ctrl + C
is_running = sdl_poll_events();
if (!is_running) {
break;
}
audio.get(params.step_ms, pcmf32_new);
if ((int) pcmf32_new.size() > 2*n_samples_step) {
@ -306,7 +310,7 @@ int main(int argc, char ** argv) {
// run the inference
{
whisper_full_params wparams = whisper_full_default_params(params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY);
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
wparams.print_progress = false;
wparams.print_special = params.print_special;
@ -317,7 +321,6 @@ int main(int argc, char ** argv) {
wparams.max_tokens = params.max_tokens;
wparams.language = params.language.c_str();
wparams.n_threads = params.n_threads;
wparams.beam_search.beam_size = params.beam_size;
wparams.audio_ctx = params.audio_ctx;

View File

@ -25,7 +25,10 @@ if (WHISPER_SDL2)
unicode-data.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
if (WHISPER_CLBLAST)
set(CLBLAST_LIBNAME clblast)
endif ()
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CLBLAST_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
if(WIN32)
# It requires Windows 8.1 or later for PrefetchVirtualMemory

View File

@ -3,19 +3,24 @@
#include "common-sdl.h"
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
#include "llama.h"
#include <chrono>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <regex>
#include <regex>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include <regex>
#include <sstream>
#include <chrono>
#if defined(_WIN32)
#define NOMINMAX
#include <windows.h>
#endif
static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
const llama_model * model = llama_get_model(ctx);
@ -273,6 +278,10 @@ The transcript only includes text, it does not include markup like HTML and Mark
{0}{4})";
int main(int argc, char ** argv) {
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
#endif
whisper_params params;
if (whisper_params_parse(argc, argv, params) == false) {

View File

@ -2,6 +2,7 @@
#include "Chessboard.h"
#include "grammar-parser.h"
#include "common.h"
#include <thread>
#include <chrono>
WChess::WChess(whisper_context * ctx,

View File

@ -2,23 +2,15 @@ cmake_minimum_required(VERSION 3.10)
project(whisper.cpp)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 11)
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)
set(SOURCE_FILES
${WHISPER_LIB_DIR}/ggml/src/ggml.c
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/unary-ops.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/binary-ops.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
${WHISPER_LIB_DIR}/src/whisper.cpp
${CMAKE_SOURCE_DIR}/jni.c
)
@ -33,7 +25,6 @@ function(build_library target_name)
)
target_link_libraries(${target_name} ${LOG_LIB} android)
target_compile_definitions(${target_name} PUBLIC GGML_USE_CPU)
if (${target_name} STREQUAL "whisper_v8fp16_va")
target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
@ -66,4 +57,3 @@ include_directories(${WHISPER_LIB_DIR}/src)
include_directories(${WHISPER_LIB_DIR}/include)
include_directories(${WHISPER_LIB_DIR}/ggml/include)
include_directories(${WHISPER_LIB_DIR}/ggml/src)
include_directories(${WHISPER_LIB_DIR}/ggml/src/ggml-cpu)

View File

@ -16,10 +16,9 @@ allprojects {
repositories {
google()
jcenter()
maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
}
}
task clean(type: Delete) {
delete rootProject.buildDir
}
}

0
examples/whisper.android.java/gradlew vendored Executable file → Normal file
View File

View File

@ -32,8 +32,6 @@ if (NOT GGML_HOME)
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/unary-ops.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/binary-ops.cpp
)
endif()
@ -46,8 +44,6 @@ function(build_library target_name)
${SOURCE_FILES}
)
target_compile_definitions(${target_name} PUBLIC GGML_USE_CPU)
if (${target_name} STREQUAL "whisper_v8fp16_va")
target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
set(GGML_COMPILE_OPTIONS -march=armv8.2-a+fp16)

View File

@ -11,23 +11,39 @@ https://user-images.githubusercontent.com/1991296/204126266-ce4177c6-6eca-4bd9-b
## Usage
This example uses the whisper.xcframework which needs to be built first using the following command:
```bash
./build_xcframework.sh
```
git clone https://github.com/ggerganov/whisper.cpp
open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/
A model is also required to be downloaded and can be done using the following command:
```bash
./models/download-ggml-model.sh base.en
```
If you don't want to convert a Core ML model, you can skip this step by creating dummy model:
```bash
# if you don't want to convert a Core ML model, you can skip this step by create dummy model
mkdir models/ggml-base.en-encoder.mlmodelc
```
Make sure to build the project in `Release`:
<img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
Also, don't forget to add the `-DGGML_USE_ACCELERATE` compiler flag for `ggml.c` in Build Phases.
This can significantly improve the performance of the transcription:
<img width="1072" alt="image" src="https://user-images.githubusercontent.com/1991296/208511239-8d7cdbd1-aa48-41b5-becd-ca288d53cc07.png">
## Core ML
Follow the [`Core ML support` section of readme](../../README.md#core-ml-support) to convert the model.
That is all the needs to be done to use the Core ML model in the app. The converted model is a
resource in the project and will be used if it is available.
If you want to enable Core ML support, you can add the `-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK` compiler flag for `whisper.cpp` in Build Phases:
<img width="1072" alt="image" src="https://github.com/ggerganov/whisper.cpp/assets/3001525/103e8f57-6eb6-490d-a60c-f6cf6c319324">
Then follow the [`Core ML support` section of readme](../../README.md#core-ml-support) for convert the model.
In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.
## Metal
You can also enable Metal to make the inference run on the GPU of your device. This might or might not be more efficient
compared to Core ML depending on the model and device that you use.
To enable Metal, just add `-DGGML_USE_METAL` instead off the `-DWHISPER_USE_COREML` flag and you are ready.
This will make both the Encoder and the Decoder run on the GPU.
If you want to run the Encoder with Core ML and the Decoder with Metal then simply add both `-DWHISPER_USE_COREML -DGGML_USE_METAL` flags. That's all!

View File

@ -7,6 +7,8 @@
objects = {
/* Begin PBXBuildFile section */
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
@ -14,12 +16,25 @@
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */; settings = {COMPILER_FLAGS = "-x c++"; }; };
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
433188B82D3A187C00E3FE79 /* gguf.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 433188B72D3A187C00E3FE79 /* gguf.cpp */; };
437B63E22D36280C002A49EC /* ggml-cpu-traits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */; };
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */ = {isa = PBXBuildFile; fileRef = 7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */; };
DDE3609F2D87EA8C004EA223 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DDE3609E2D87EA8C004EA223 /* whisper.xcframework */; };
DDE360A02D87EA8C004EA223 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = DDE3609E2D87EA8C004EA223 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@ -29,24 +44,18 @@
dstPath = "";
dstSubfolderSpec = 7;
files = (
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */,
);
name = "Copy Files";
runOnlyForDeploymentPostprocessing = 0;
};
DDE360A12D87EA8C004EA223 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
DDE360A02D87EA8C004EA223 /* whisper.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
@ -59,7 +68,34 @@
18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../include/whisper.h; sourceTree = "<group>"; };
18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../src/whisper.cpp; sourceTree = "<group>"; };
18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml/src/ggml.c; sourceTree = "<group>"; };
18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml/include/ggml.h; sourceTree = "<group>"; };
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../../ggml/include/ggml-metal.h"; sourceTree = "<group>"; };
18A275FF2C2A9563001C8D37 /* ggml-common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-common.h"; path = "../../../ggml/src/ggml-common.h"; sourceTree = "<group>"; };
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml/src/ggml-backend-impl.h"; sourceTree = "<group>"; };
18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp"; sourceTree = "<group>"; };
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
433188B72D3A187C00E3FE79 /* gguf.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = gguf.cpp; path = ../../../ggml/src/gguf.cpp; sourceTree = "<group>"; };
433188B92D3A18A400E3FE79 /* gguf.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = gguf.h; path = ../../../ggml/include/gguf.h; sourceTree = "<group>"; };
437B63E02D36280C002A49EC /* ggml-cpu-traits.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-traits.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-traits.h"; sourceTree = "<group>"; };
437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu-traits.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-traits.cpp"; sourceTree = "<group>"; };
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
@ -67,7 +103,6 @@
7FE342492A0C3FA20015A058 /* whisper-encoder-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder-impl.h"; sourceTree = "<group>"; };
7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-decoder-impl.m"; sourceTree = "<group>"; };
7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = "ggml-base.en-encoder.mlmodelc"; path = "../../../models/ggml-base.en-encoder.mlmodelc"; sourceTree = "<group>"; };
DDE3609E2D87EA8C004EA223 /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../../build-apple/whisper.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -75,7 +110,6 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
DDE3609F2D87EA8C004EA223 /* whisper.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -86,7 +120,6 @@
isa = PBXGroup;
children = (
18627C7829052BDF00BD2A04 /* whisper.objc */,
DDE3609D2D87EA8C004EA223 /* Frameworks */,
18627C7729052BDF00BD2A04 /* Products */,
);
sourceTree = "<group>";
@ -102,9 +135,40 @@
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
isa = PBXGroup;
children = (
433188B92D3A18A400E3FE79 /* gguf.h */,
433188B72D3A187C00E3FE79 /* gguf.cpp */,
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.cpp */,
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
437B63E02D36280C002A49EC /* ggml-cpu-traits.h */,
437B63E12D36280C002A49EC /* ggml-cpu-traits.cpp */,
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
18A275FF2C2A9563001C8D37 /* ggml-common.h */,
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
18ABE1572AF556340044A204 /* ggml-backend.cpp */,
18ABE1552AF556340044A204 /* ggml-backend.h */,
18ABE1582AF556340044A204 /* ggml-impl.h */,
18ABE1592AF556340044A204 /* ggml-quants.c */,
18ABE1542AF556340044A204 /* ggml-quants.h */,
1844471D2AB2195F007D6BFE /* ggml-metal.metal */,
1844471B2AB21655007D6BFE /* ggml-metal.m */,
184447182AB211A2007D6BFE /* ggml-alloc.c */,
184447192AB211A2007D6BFE /* ggml-alloc.h */,
7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */,
7FE342442A0C3FA20015A058 /* coreml */,
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */,
18627C9729052C6600BD2A04 /* ggml.h */,
18627C9529052C5800BD2A04 /* ggml.c */,
18627C9329052C4900BD2A04 /* whisper.cpp */,
18627C9229052C2B00BD2A04 /* whisper.h */,
18627C7929052BDF00BD2A04 /* AppDelegate.h */,
18627C7A29052BDF00BD2A04 /* AppDelegate.m */,
18627C7C29052BDF00BD2A04 /* SceneDelegate.h */,
@ -134,14 +198,6 @@
path = ../../../src/coreml;
sourceTree = "<group>";
};
DDE3609D2D87EA8C004EA223 /* Frameworks */ = {
isa = PBXGroup;
children = (
DDE3609E2D87EA8C004EA223 /* whisper.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
@ -153,7 +209,6 @@
18627C7329052BDF00BD2A04 /* Frameworks */,
18627C7429052BDF00BD2A04 /* Resources */,
184447202AB21B25007D6BFE /* Copy Files */,
DDE360A12D87EA8C004EA223 /* Embed Frameworks */,
);
buildRules = (
);
@ -201,6 +256,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */,
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
@ -217,10 +273,25 @@
buildActionMask = 2147483647;
files = (
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */,
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */,
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
437B63E22D36280C002A49EC /* ggml-cpu-traits.cpp in Sources */,
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.cpp in Sources */,
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
18627C8C29052BE000BD2A04 /* main.m in Sources */,
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
433188B82D3A187C00E3FE79 /* gguf.cpp in Sources */,
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -298,7 +369,7 @@
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = "";
HEADER_SEARCH_PATHS = ../../../ggml/src/;
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
@ -352,7 +423,7 @@
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = "";
HEADER_SEARCH_PATHS = ../../../ggml/src/;
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
@ -370,13 +441,12 @@
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
HEADER_SEARCH_PATHS = "";
HEADER_SEARCH_PATHS = ../../../ggml/src/;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
@ -389,12 +459,10 @@
);
MARKETING_VERSION = 1.0;
MTL_HEADER_SEARCH_PATHS = "";
OTHER_CFLAGS = "-DGGML_USE_CPU=ON";
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
TARGETED_DEVICE_FAMILY = "1,2";
WARNING_CFLAGS = "-Wno-quoted-include-in-framework-header";
};
name = Debug;
};
@ -403,13 +471,12 @@
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
HEADER_SEARCH_PATHS = "";
HEADER_SEARCH_PATHS = ../../../ggml/src/;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
@ -422,12 +489,10 @@
);
MARKETING_VERSION = 1.0;
MTL_HEADER_SEARCH_PATHS = "";
OTHER_CFLAGS = "-DGGML_USE_CPU=ON";
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
TARGETED_DEVICE_FAMILY = "1,2";
WARNING_CFLAGS = "-Wno-quoted-include-in-framework-header";
};
name = Release;
};

View File

@ -6,8 +6,8 @@
//
#import "ViewController.h"
#import <whisper/whisper.h>
#import "whisper.h"
#define NUM_BYTES_PER_BUFFER 16*1024
@ -83,19 +83,6 @@ void AudioInputCallback(void * inUserData,
stateInp.n_samples = 0;
stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
// Set up audio session
NSError *error = nil;
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryRecord error:&error];
if (error) {
NSLog(@"Error setting audio session category: %@", error);
}
[[AVAudioSession sharedInstance] setActive:YES error:&error];
if (error) {
NSLog(@"Error activating audio session: %@", error);
}
}
stateInp.isTranscribing = false;

View File

@ -1,29 +1,7 @@
# whisper.cpp/examples/whisper.swiftui
A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions.
See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc).
### Building
First whisper.cpp need to be built and a XCFramework needs to be created. This can be done by running
the following script from the whisper.cpp project root:
```console
$ ./build-xcframework.sh
```
Note: if you get the error "iphoneos is not an iOS SDK" then you probably need to run this command first:
```console
sudo xcode-select -switch /Applications/Xcode.app/Contents/Developer
```
Open `whisper.swiftui.xcodeproj` project in Xcode and you should be able to build and run the app on
a simulator or a real device.
To use the framework with a different project, the XCFramework can be added to the project by
adding `build-apple/whisper.xcframework` by dragging and dropping it into the project navigator, or
by manually selecting the framework in the "Frameworks, Libraries, and Embedded Content" section
of the project settings.
### Usage
**Usage**:
1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1]
2. Add the model to `whisper.swiftui.demo/Resources/models` **via Xcode**.
@ -33,19 +11,6 @@ sudo xcode-select -switch /Applications/Xcode.app/Contents/Developer
**Note:** Pay attention to the folder path: `whisper.swiftui.demo/Resources/models` is the appropriate directory to place resources whilst `whisper.swiftui.demo/Models` is related to actual code.
### Core ML support
1. Follow all the steps in the `Usage` section, including adding the ggml model file.
2. Follow the [`Core ML support` section of readme](../../README.md#core-ml-support) to convert the
model.
3. Add the Core ML model (`models/ggml-base.en-encoder.mlmodelc/`) to `whisper.swiftui.demo/Resources/models` **via Xcode**.
When the example starts running you should now see that it is using the Core ML model:
```console
whisper_init_state: loading Core ML model from '/Library/Developer/CoreSimulator/Devices/25E8C27D-0253-4281-AF17-C3F2A4D1D8F4/data/Containers/Bundle/Application/3ADA7D59-7B9C-43B4-A7E1-A87183FC546A/whisper.swiftui.app/models/ggml-base.en-encoder.mlmodelc'
whisper_init_state: first run on a device may take a while ...
whisper_init_state: Core ML model loaded
```
[^1]: I recommend the tiny, base or small models for running on an iOS device.
[^2]: The `Release` build can boost performance of transcription. In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.

View File

@ -17,26 +17,11 @@
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
5B3454FF2D8178F80005A3BC /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5B3454FE2D8178F80005A3BC /* whisper.xcframework */; };
5B3455002D8178F80005A3BC /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 5B3454FE2D8178F80005A3BC /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */; };
7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0EF2CE0C6F700ACD7BF /* Model.swift */; };
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
5B3455012D8178F80005A3BC /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
5B3455002D8178F80005A3BC /* whisper.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
0A8E48FF2954B3F100704C1B /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
0AA751462953AC2E001EE061 /* samples */ = {isa = PBXFileReference; lastKnownFileType = folder; path = samples; sourceTree = "<group>"; };
@ -50,9 +35,9 @@
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
5B3454FE2D8178F80005A3BC /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../../build-apple/whisper.xcframework"; sourceTree = "<group>"; };
7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
7F79E0EF2CE0C6F700ACD7BF /* Model.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -60,7 +45,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
5B3454FF2D8178F80005A3BC /* whisper.xcframework in Frameworks */,
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -97,6 +82,7 @@
0AAC5D8E29539CCF003032C3 = {
isa = PBXGroup;
children = (
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
0A8E48FF2954B3F100704C1B /* README.md */,
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
@ -155,7 +141,6 @@
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
isa = PBXGroup;
children = (
5B3454FE2D8178F80005A3BC /* whisper.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
@ -170,7 +155,6 @@
0AAC5D9329539CCF003032C3 /* Sources */,
0AAC5D9429539CCF003032C3 /* Frameworks */,
0AAC5D9529539CCF003032C3 /* Resources */,
5B3455012D8178F80005A3BC /* Embed Frameworks */,
);
buildRules = (
);
@ -178,6 +162,7 @@
);
name = whisper.swiftui;
packageProductDependencies = (
E3F92DC42AFA8E3800A6A9D4 /* whisper */,
);
productName = WhisperCppDemo;
productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
@ -471,6 +456,13 @@
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
/* Begin XCSwiftPackageProductDependency section */
E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
isa = XCSwiftPackageProductDependency;
productName = whisper;
};
/* End XCSwiftPackageProductDependency section */
};
rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
}

View File

@ -32,9 +32,8 @@ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE_STRICT=0 \
-s INITIAL_MEMORY=512MB \
-s MAXIMUM_MEMORY=2000MB \
-s ALLOW_MEMORY_GROWTH=1 \
-s INITIAL_MEMORY=2000MB \
-s TOTAL_MEMORY=2000MB \
-s FORCE_FILESYSTEM=1 \
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
${EXTRA_FLAGS} \

View File

@ -22,7 +22,7 @@ audio is limited to 120 seconds.
## Live demo
Link: https://ggerganov.github.io/whisper.cpp/
Link: https://whisper.ggerganov.com
![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)
@ -35,17 +35,7 @@ cd whisper.cpp
mkdir build-em && cd build-em
emcmake cmake ..
make -j
```
The example can then be started by running a local HTTP server:
```console
python3 examples/server.py
```
And then opening a browser to the following URL:
http://localhost:8000/whisper.wasm
To run the example in a different server, you need to copy the following files
to the server's HTTP path:
```
# copy the produced page to your HTTP path
cp bin/whisper.wasm/* /path/to/html/
cp bin/libmain.worker.js /path/to/html/

View File

@ -24,8 +24,6 @@
overflow-x: scroll;
}
</style>
<script src="coi-serviceworker.js"></script>
<link rel="icon" href="data:,">
</head>
<body>
<div id="main-container">
@ -49,9 +47,11 @@
</ul>
<b>More examples:</b>
<a href="bench.wasm/">bench</a> |
<a href="stream.wasm">stream</a> |
<a href="command.wasm/">command</a> |
<a href="https://whisper.ggerganov.com/">main</a> |
<a href="https://whisper.ggerganov.com/bench">bench</a> |
<a href="https://whisper.ggerganov.com/stream">stream</a> |
<a href="https://whisper.ggerganov.com/command">command</a> |
<a href="https://whisper.ggerganov.com/talk">talk</a> |
<hr>
@ -614,7 +614,7 @@
var nthreads = 8;
function changeThreads(value) {
nthreads = parseInt(value, 10);
nthreads = value;
document.getElementById('threads-value').innerHTML = nthreads;
}

View File

@ -100,17 +100,11 @@ else()
set(INS_ENB ON)
endif()
message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}")
message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
message(DEBUG "INS_ENB : ${INS_ENB}")
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
@ -127,12 +121,9 @@ endif()
option(GGML_LASX "ggml: enable lasx" ON)
option(GGML_LSX "ggml: enable lsx" ON)
option(GGML_RVV "ggml: enable rvv" ON)
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
option(GGML_VXE "ggml: enable vxe" ON)
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
if (WIN32)
@ -159,17 +150,12 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
"ggml: max. batch size for using peer access")
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
"ggml: cuda link binary compression mode; requires cuda 12.8+")
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
@ -192,7 +178,6 @@ option(GGML_OPENMP "ggml: use OpenMP"
option(GGML_RPC "ggml: use RPC" OFF)
option(GGML_SYCL "ggml: use SYCL" OFF)
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
@ -202,8 +187,6 @@ option(GGML_OPENCL "ggml: use OpenCL"
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
"gmml: OpenCL API version to target")
# toolchain for vulkan-shaders-gen
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@ -226,8 +209,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include(GNUInstallDirs)
#
# build the library
#
@ -251,6 +232,7 @@ endif ()
# install
#
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
# all public headers
@ -261,7 +243,6 @@ set(GGML_PUBLIC_HEADERS
include/ggml-backend.h
include/ggml-blas.h
include/ggml-cann.h
include/ggml-cpp.h
include/ggml-cuda.h
include/ggml-kompute.h
include/ggml-opt.h

View File

@ -1,26 +0,0 @@
function(ggml_get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")
if (CCID MATCHES "Clang")
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
endif()
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()

View File

@ -5,7 +5,7 @@
set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
find_package(Threads REQUIRED)
@ -112,7 +112,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
if(is_cpu_variant)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
@ -124,7 +124,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
endif()
else()
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
@ -139,11 +139,6 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
endforeach()
list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
set_target_properties(ggml::ggml
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_INTERFACE_LINK_LIBRARIES}")
add_library(ggml::all INTERFACE IMPORTED)
set_target_properties(ggml::all
PROPERTIES

View File

@ -19,7 +19,7 @@ struct ggml_tallocr {
};
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
// Graph allocator
/*

View File

@ -56,7 +56,7 @@ extern "C" {
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@ -342,8 +342,8 @@ extern "C" {
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
// Tensor initialization
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);

View File

@ -8,7 +8,7 @@ extern "C" {
#endif
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggml-org/ggml/issues/287
// since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan {
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@ -80,7 +80,6 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_avx (void);
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
GGML_BACKEND_API int ggml_cpu_has_fma (void);
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
@ -96,11 +95,9 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
GGML_BACKEND_API int ggml_cpu_has_sve (void);
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
GGML_BACKEND_API int ggml_cpu_has_sme (void);
// other
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);

View File

@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
GGML_DEPRECATED(
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);

View File

@ -17,9 +17,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
const char * cache_dir,
size_t free_mem, size_t total_mem);
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);

View File

@ -10,6 +10,8 @@ extern "C" {
#define GGML_VK_NAME "Vulkan"
#define GGML_VK_MAX_DEVICES 16
GGML_BACKEND_API void ggml_vk_instance_init(void);
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);

View File

@ -198,7 +198,7 @@
#ifndef __GNUC__
# define GGML_ATTRIBUTE_FORMAT(...)
#elif defined(__MINGW32__) && !defined(__clang__)
#elif defined(__MINGW32__)
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
#else
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@ -454,7 +454,6 @@ extern "C" {
GGML_OP_RMS_NORM,
GGML_OP_RMS_NORM_BACK,
GGML_OP_GROUP_NORM,
GGML_OP_L2_NORM,
GGML_OP_MUL_MAT,
GGML_OP_MUL_MAT_ID,
@ -503,7 +502,6 @@ extern "C" {
GGML_OP_ADD_REL_POS,
GGML_OP_RWKV_WKV6,
GGML_OP_GATED_LINEAR_ATTN,
GGML_OP_RWKV_WKV7,
GGML_OP_UNARY,
@ -1097,18 +1095,6 @@ extern "C" {
int n_groups,
float eps);
// l2 normalize along rows
// used in rwkv v7
GGML_API struct ggml_tensor * ggml_l2_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
float eps);
GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float eps);
// a - x
// b - dy
GGML_API struct ggml_tensor * ggml_rms_norm_back(
@ -1791,11 +1777,11 @@ extern "C" {
#define GGML_KQ_MASK_PAD 64
// q: [n_embd_k, n_batch, n_head, 1]
// k: [n_embd_k, n_kv, n_head_kv, 1]
// v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
// res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
// q: [n_embd, n_batch, n_head, 1]
// k: [n_embd, n_kv, n_head_kv, 1]
// v: [n_embd, n_kv, n_head_kv, 1] !! not transposed !!
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
// res: [n_embd, n_head, n_batch, 1] !! permuted !!
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
struct ggml_context * ctx,
struct ggml_tensor * q,
@ -1904,16 +1890,6 @@ extern "C" {
struct ggml_tensor * state,
float scale);
GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
struct ggml_context * ctx,
struct ggml_tensor * r,
struct ggml_tensor * w,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * state);
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@ -2164,11 +2140,7 @@ extern "C" {
# define GGML_RESTRICT
# endif
#else
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
# define GGML_RESTRICT __restrict
# else
# define GGML_RESTRICT restrict
# endif
# define GGML_RESTRICT restrict
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);

View File

@ -1,5 +1,4 @@
include(CheckCXXCompilerFlag)
include("../cmake/common.cmake")
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
@ -25,6 +24,33 @@ if (NOT MSVC)
endif()
endif()
function(ggml_get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")
if (CCID MATCHES "Clang")
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
endif()
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()
if (GGML_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND C_FLAGS -Werror)
@ -65,7 +91,7 @@ if (GGML_LTO)
endif()
endif()
if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
if (GGML_CCACHE)
find_program(GGML_CCACHE_FOUND ccache)
find_program(GGML_SCCACHE_FOUND sccache)
@ -76,11 +102,7 @@ if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAU
set(GGML_CCACHE_VARIANT sccache)
endif()
# TODO: should not be set globally
if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl")
else ()
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
endif ()
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
set(ENV{CCACHE_SLOPPINESS} time_macros)
message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
else()
@ -204,9 +226,6 @@ add_library(ggml-base
gguf.cpp)
target_include_directories(ggml-base PRIVATE .)
if (GGML_BACKEND_DL)
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
endif()
add_library(ggml
ggml-backend-reg.cpp)
@ -214,7 +233,7 @@ add_library(ggml
target_link_libraries(ggml PUBLIC ggml-base)
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ggml PRIVATE dl stdc++fs)
target_link_libraries(ggml PRIVATE dl)
endif()
function(ggml_add_backend_library backend)
@ -267,7 +286,7 @@ function(ggml_add_cpu_backend_variant tag_name)
set(GGML_CPU_TAG_NAME ${tag_name})
# other: OPENMP LLAMAFILE CPU_HBM
foreach (feat NATIVE
AVX AVX2 BMI2 AVX_VNNI FMA F16C
AVX AVX2 AVX_VNNI FMA F16C
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
AMX_TILE AMX_INT8 AMX_BF16)
set(GGML_${feat} OFF)
@ -287,13 +306,13 @@ if (GGML_CPU_ALL_VARIANTS)
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
endif()
ggml_add_cpu_backend_variant(sandybridge AVX)
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
if (NOT MSVC)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
elseif (GGML_CPU)
ggml_add_cpu_backend_variant_impl("")
@ -329,10 +348,6 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
target_link_libraries(ggml-base PRIVATE dl)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
endif()
if (BUILD_SHARED_LIBS)
foreach (target ggml-base ggml)
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)

View File

@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
return talloc;
}
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
size = GGML_PAD(size, talloc->alignment);
@ -104,7 +104,7 @@ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_te
assert(((uintptr_t)addr % talloc->alignment) == 0);
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
}
// dynamic tensor allocator
@ -933,51 +933,42 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
// utils
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free((*buffers)[i]);
}
free(*buffers);
}
static bool alloc_tensor_range(struct ggml_context * ctx,
struct ggml_tensor * first, struct ggml_tensor * last,
ggml_backend_buffer_type_t buft, size_t size,
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
if (buffer == NULL) {
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
free_buffers(buffers, n_buffers);
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
#endif
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free((*buffers)[i]);
}
free(*buffers);
return false;
}
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
enum ggml_status status = GGML_STATUS_SUCCESS;
if (t->data == NULL) {
if (t->view_src == NULL) {
status = ggml_tallocr_alloc(&tallocr, t);
ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
status = ggml_backend_view_init(t);
ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
status = ggml_backend_view_init(t);
ggml_backend_view_init(t);
}
}
if (status != GGML_STATUS_SUCCESS) {
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
free_buffers(buffers, n_buffers);
return false;
}
}
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;
return true;
}
@ -998,7 +989,19 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
}
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
if (this_size > max_size) {
GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
__func__, t->name,
ggml_backend_buft_name(buft),
this_size, max_size);
for (size_t i = 0; i < n_buffers; i++) {
ggml_backend_buffer_free(buffers[i]);
}
free(buffers);
return NULL;
}
if ((cur_buf_size + this_size) > max_size) {
// allocate tensors in the current buffer
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
return NULL;

View File

@ -44,7 +44,7 @@ extern "C" {
// base address of the buffer
void * (*get_base) (ggml_backend_buffer_t buffer);
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
// tensor data access
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);

Some files were not shown because too many files have changed in this diff Show More