mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-07 09:52:00 +02:00
Compare commits
351 Commits
Author | SHA1 | Date | |
---|---|---|---|
51c6961c7b | |||
503a786c9a | |||
ad4e350933 | |||
d7a9346ab1 | |||
b63d23f728 | |||
f6ce10e4a1 | |||
6cb2b86581 | |||
801d6bd809 | |||
ddf7e6a15d | |||
0d42097fd3 | |||
842b9c984c | |||
0810f02547 | |||
8c13c78f9d | |||
f31b404fcb | |||
854c0518bc | |||
c8e3968edd | |||
b358de2458 | |||
11688b262f | |||
04b9508fb3 | |||
4200430e75 | |||
e153b8eaa2 | |||
83af237f0b | |||
7a2e39750a | |||
0a40ae9728 | |||
32cfdcbf42 | |||
cfa42aca09 | |||
2e2f0f954b | |||
93631b2be6 | |||
f9015b585b | |||
1880ffd7ff | |||
9173932c78 | |||
94c3f3877f | |||
00086469fb | |||
2d8e40e2a0 | |||
e17af6524f | |||
88d13a17a7 | |||
f92bd59951 | |||
6e7629b146 | |||
27533e7f63 | |||
1b81415963 | |||
0001ec075f | |||
5bad2e5099 | |||
6fc0ae2f5a | |||
de6b38c6d9 | |||
46d6e0abc1 | |||
1279f0d0bc | |||
f28bf5d186 | |||
1fbdfb1d36 | |||
ee5581633b | |||
8ca67df291 | |||
fc6d343e76 | |||
3199356d3a | |||
e0c43b0bbf | |||
f4f619ea8e | |||
3c4d363872 | |||
15aa189329 | |||
c53d5c9e85 | |||
ba6f584f30 | |||
a219941812 | |||
a2cc8c2666 | |||
388ed98220 | |||
d487a28ae1 | |||
cbb88c4050 | |||
13455c0b5f | |||
2f77a9e9bd | |||
fa2b5249ff | |||
5b854ebba5 | |||
8058f19d0b | |||
ae6a9bb9a5 | |||
24faba9e9b | |||
c722ff84d3 | |||
102af79f63 | |||
03c364557d | |||
31b62276cf | |||
97b5a3055d | |||
9993c3f703 | |||
fa72479cfb | |||
6c15539c54 | |||
52c4c03b0a | |||
cfc2560e41 | |||
db6e8056b5 | |||
b3f3779c1b | |||
13eeebb1b2 | |||
905b834af1 | |||
2cd3061a23 | |||
88d59e21b2 | |||
4917f122d4 | |||
16a1b77249 | |||
51d1398a0a | |||
3499dd83c0 | |||
7b7d9ae35e | |||
2dcb7181ff | |||
96ab3b2465 | |||
08f32992d0 | |||
394fae57c3 | |||
0708835301 | |||
774c519433 | |||
776cdceb9e | |||
03d050481e | |||
3d60219622 | |||
521d72d76e | |||
9fb9025a40 | |||
3c2abb01e8 | |||
efd9407e22 | |||
3684af2594 | |||
206459a804 | |||
21d890d534 | |||
0b43a02be8 | |||
2699e1485a | |||
594a121f3e | |||
996581c5e2 | |||
226d344f56 | |||
bb9f68129f | |||
30cf30ca82 | |||
ee6286c35d | |||
c7941d5ccc | |||
b82ac32a6c | |||
edf1ee1ef8 | |||
cf5ddb8c21 | |||
7fe4979f25 | |||
9bc0dc7235 | |||
3fc6ad97a3 | |||
663cafc1e8 | |||
be9de81171 | |||
21fb513ef1 | |||
4e56747944 | |||
ca75449a92 | |||
80dad86b2c | |||
485ece6725 | |||
e7d9d8687a | |||
6e8242f7fe | |||
e27fd6f0c0 | |||
96db0c5a9c | |||
d2aaffd5d9 | |||
215990abde | |||
7e23d8c64a | |||
740bf7f6a1 | |||
c8e12f59dd | |||
83b14c357c | |||
60b481d881 | |||
4854789751 | |||
e0f3c9d4dd | |||
1f4886b40d | |||
f11de0e73c | |||
d5cc27ee4d | |||
5bb1d58c6a | |||
7d14005717 | |||
4ffb8e3e4d | |||
1d8d8ae55e | |||
eebf6bc0bd | |||
dc8f423b40 | |||
548e7052f1 | |||
a34cb73dc2 | |||
82f9496657 | |||
e3c85e75bd | |||
b9eab73fa2 | |||
76385c8311 | |||
442cd1d2e7 | |||
bc8cb97e02 | |||
8dcadf736b | |||
93986b61e0 | |||
bd1a9e34c9 | |||
cc03608e78 | |||
54a54faee4 | |||
96a92ecc4c | |||
edd1d8686a | |||
dc6f4e7c05 | |||
74c85d154e | |||
eb2d8b6ffd | |||
b442dcd598 | |||
c98681e6d5 | |||
3bab804981 | |||
c927830a70 | |||
992b51b3d5 | |||
2c882cbe4c | |||
1fbb119b1e | |||
40dea850fd | |||
8255a830a8 | |||
a0f76b2da7 | |||
394768c48b | |||
846e01b2c0 | |||
6ac8e6b2ce | |||
60d2ddebdf | |||
2e180184a8 | |||
ef40950c4a | |||
c774eec709 | |||
5b481a27a6 | |||
fc7b1ee521 | |||
c42f67e2d2 | |||
339a1cba5d | |||
c64f3e8ada | |||
9f83f67221 | |||
7d3da68f79 | |||
b5d21359c1 | |||
17addf7104 | |||
cdaee8b4bd | |||
4b60ff4f92 | |||
b43b9d928c | |||
e3cb412a59 | |||
ac301a7d9b | |||
82e04e7670 | |||
38ac47cd4d | |||
2d70cd36d7 | |||
98dab49b9a | |||
b1385e9aa9 | |||
48f5e893f5 | |||
dc21871fcb | |||
64a430bc81 | |||
51a3580c79 | |||
37a21dd43d | |||
8a22a8b17f | |||
fcbcad0c90 | |||
4444db7360 | |||
a7fc1038ca | |||
1689aaf854 | |||
4b48fe449a | |||
47cc043e69 | |||
e3d9ffb98b | |||
e22d69839d | |||
defe731263 | |||
4e07957bf9 | |||
d2c5154bb5 | |||
4fac43fe00 | |||
3be9670f17 | |||
86729fcd6d | |||
7fbca6304e | |||
d597f83e1a | |||
e5edcc6259 | |||
556f773d53 | |||
91d02de332 | |||
1b67d72f87 | |||
14d7c0368d | |||
db6e19188a | |||
b4b063a5c9 | |||
930b739e7a | |||
5981352bb5 | |||
7561da244e | |||
be83f342fb | |||
fd369871f7 | |||
bbd8364f5e | |||
e4102440ef | |||
f8242ec483 | |||
ef51b4cba4 | |||
6f08b24146 | |||
7c165d7fa8 | |||
2f0cf44915 | |||
b9c972fd0d | |||
01c9aafbfd | |||
bae6bbf487 | |||
c310272fa0 | |||
bd0b55dbe0 | |||
ba4645db2c | |||
dfc6ca62f3 | |||
47e14c0529 | |||
d682e15090 | |||
46d07b9c85 | |||
33ea03f131 | |||
dbcc669e1a | |||
16245b35e4 | |||
898c0cb9d1 | |||
eb9e5032c4 | |||
cadfc50eab | |||
3f91832352 | |||
cff8868b5f | |||
90e3c5fc40 | |||
e0f4cef867 | |||
234460987e | |||
b8ab126343 | |||
edc5d9267c | |||
344b98a44f | |||
dbeb7916b8 | |||
fad2806352 | |||
9906792ec3 | |||
c49ee07ff4 | |||
f8a831779e | |||
85451e3612 | |||
43c744ce8b | |||
fc2e44490d | |||
f41fdad200 | |||
80fa576254 | |||
75e7d0585e | |||
682a6f5f87 | |||
115716d109 | |||
b2cfef655b | |||
22e3df0afa | |||
028511d349 | |||
70c4038842 | |||
8639c003a9 | |||
d5d831da65 | |||
7230a6e1c8 | |||
a160fa0f3a | |||
0282ad8fd1 | |||
9e467815d4 | |||
727891d9bf | |||
c262dc80e2 | |||
30767b4c4e | |||
16eeb31933 | |||
ba523d5e22 | |||
3736706139 | |||
58640aa456 | |||
5183a05e56 | |||
0dcada42d4 | |||
d507b4cebe | |||
90171055f3 | |||
668306ff2b | |||
fdc21fc87b | |||
7183a1eb72 | |||
09f3c66648 | |||
62e2414620 | |||
de49024e49 | |||
db6383094c | |||
164f13c6a9 | |||
02aa86230a | |||
54a2ee648f | |||
9700cfb0a3 | |||
8e0143e205 | |||
f12559d590 | |||
589b40810a | |||
7ffcd05267 | |||
7a423f1c00 | |||
99b011a9f5 | |||
19d95f9f9a | |||
d5ef1737d8 | |||
1deb41f0e7 | |||
2425caf4fd | |||
a4b00bcaaf | |||
cdb8aa2f2e | |||
06209f6683 | |||
c3235bd81e | |||
262d0abc87 | |||
124eec1664 | |||
b08c3a88c8 | |||
0afce25a69 | |||
acdbe58631 | |||
09fabffdf5 | |||
3988d6396b | |||
c8c63eeec0 | |||
abf7f24410 | |||
341f5c28e6 | |||
5377099524 | |||
dcbb375779 | |||
4334c71aed | |||
e875a82473 | |||
507e230f1e | |||
eb68324c86 | |||
e940fbf283 | |||
35d0e02c72 | |||
45d3faf961 | |||
2ab2eb5110 | |||
b82d305282 | |||
885e31368d |
@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git cmake libsdl2-dev wget
|
||||
apt-get install -y build-essential git cmake libsdl2-dev wget git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
@ -17,7 +17,7 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
ENV GGML_CUDA=1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
# Ref: https://stackoverflow.com/a/53464012
|
||||
@ -33,7 +33,7 @@ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg wget cmake \
|
||||
apt-get install -y curl ffmpeg wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
|
@ -2,7 +2,7 @@ FROM ubuntu:22.04 AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential wget cmake \
|
||||
apt-get install -y build-essential wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY .. .
|
||||
@ -12,7 +12,7 @@ FROM ubuntu:22.04 AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake \
|
||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
|
4
.github/workflows/bindings-go.yml
vendored
4
.github/workflows/bindings-go.yml
vendored
@ -10,8 +10,8 @@ on:
|
||||
- whisper.h
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
|
18
.github/workflows/bindings-ruby.yml
vendored
18
.github/workflows/bindings-ruby.yml
vendored
@ -19,7 +19,12 @@ on:
|
||||
- ggml/**/*.m
|
||||
- ggml/**/*.metal
|
||||
- scripts/get-flags.mk
|
||||
- examples/dr_wav.h
|
||||
- examples/common.h
|
||||
- examples/common.cpp
|
||||
- examples/common-whisper.h
|
||||
- examples/common-whisper.cpp
|
||||
- examples/stb_vorbis.c
|
||||
- examples/miniaudio.h
|
||||
pull_request:
|
||||
paths:
|
||||
- bindings/ruby/**
|
||||
@ -39,11 +44,16 @@ on:
|
||||
- ggml/**/*.m
|
||||
- ggml/**/*.metal
|
||||
- scripts/get-flags.mk
|
||||
- examples/dr_wav.h
|
||||
- examples/common.h
|
||||
- examples/common.cpp
|
||||
- examples/common-whisper.h
|
||||
- examples/common-whisper.cpp
|
||||
- examples/stb_vorbis.c
|
||||
- examples/miniaudio.h
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bindings/ruby
|
||||
|
550
.github/workflows/build.yml
vendored
550
.github/workflows/build.yml
vendored
@ -6,18 +6,82 @@ on:
|
||||
- master
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
create_release:
|
||||
description: 'Create new release'
|
||||
required: true
|
||||
type: boolean
|
||||
pre_release_tag:
|
||||
description: 'Pre-release tag name'
|
||||
required: false
|
||||
type: string
|
||||
run_type:
|
||||
description: 'Workflow type to run'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- full-ci
|
||||
- release-only
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: write # for creating release
|
||||
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
ubuntu_image: "ubuntu:22.04"
|
||||
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
determine-tag:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tag_name: ${{ steps.tag.outputs.name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout with full history
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER=$(git rev-list --count HEAD)
|
||||
SHORT_HASH=$(git rev-parse --short=7 HEAD)
|
||||
CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
|
||||
|
||||
echo "Raw values:"
|
||||
echo "BUILD_NUMBER: $BUILD_NUMBER"
|
||||
echo "SHORT_HASH: $SHORT_HASH"
|
||||
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
|
||||
echo "CUSTOM_TAG: $CUSTOM_TAG"
|
||||
|
||||
# Use custom tag if provided
|
||||
if [[ -n "$CUSTOM_TAG" ]]; then
|
||||
echo "Using custom tag"
|
||||
TAG_NAME="${CUSTOM_TAG}"
|
||||
elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||
echo "Using master branch format"
|
||||
TAG_NAME="b${BUILD_NUMBER}"
|
||||
else
|
||||
echo "Using non-master branch format"
|
||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||
TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
|
||||
fi
|
||||
|
||||
echo "Final tag name: $TAG_NAME"
|
||||
echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
|
||||
ubuntu-22:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -38,12 +102,14 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential libsdl2-dev cmake
|
||||
apt install -y build-essential libsdl2-dev cmake git
|
||||
cmake -B build
|
||||
cmake --build build --config Release -j $(nproc)'
|
||||
|
||||
ubuntu-latest-arm64:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-arm64:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -64,12 +130,14 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential libsdl2-dev cmake
|
||||
apt install -y build-essential libsdl2-dev cmake git
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
|
||||
cmake --build build --config Release -j $(nproc)'
|
||||
|
||||
ubuntu-latest-arm-v7:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-arm-v7:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -90,17 +158,30 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential libsdl2-dev cmake
|
||||
apt install -y build-essential libsdl2-dev cmake git
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
|
||||
cmake --build build --config Release -j $(nproc)'
|
||||
|
||||
macOS-latest:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: macOS-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||
with:
|
||||
key: macOS-latest-swift
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
@ -108,29 +189,39 @@ jobs:
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
sysctl -a
|
||||
cmake -B build -G Xcode \
|
||||
-DGGML_METAL_USE_BF16=ON \
|
||||
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||
-DWHISPER_BUILD_EXAMPLES=OFF \
|
||||
-DWHISPER_BUILD_TESTS=OFF \
|
||||
-DWHISPER_BUILD_SERVER=OFF \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
||||
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
||||
|
||||
# freeBSD-latest:
|
||||
# runs-on: macos-12
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Build
|
||||
# uses: cross-platform-actions/action@v0.24.0
|
||||
# with:
|
||||
# operating_system: freebsd
|
||||
# version: '13.3'
|
||||
# run: |
|
||||
# sudo pkg update
|
||||
# sudo pkg install -y gmake sdl2 cmake
|
||||
# cmake -B build
|
||||
# cmake --build build --config Release
|
||||
|
||||
ubuntu-latest-gcc:
|
||||
runs-on: ubuntu-latest
|
||||
freeBSD-latest:
|
||||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
uses: cross-platform-actions/action@v0.27.0
|
||||
with:
|
||||
operating_system: freebsd
|
||||
version: '14.2'
|
||||
run: |
|
||||
sudo pkg update
|
||||
sudo pkg install -y gmake sdl2 cmake git
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
|
||||
ubuntu-22-gcc:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -152,13 +243,15 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
apt install -y build-essential cmake libsdl2-dev git
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
ubuntu-latest-gcc-arm64:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-gcc-arm64:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -180,13 +273,15 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
apt install -y build-essential cmake libsdl2-dev git
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
ubuntu-latest-gcc-arm-v7:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-gcc-arm-v7:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -208,13 +303,15 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
apt install -y build-essential cmake libsdl2-dev git
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
ubuntu-latest-clang:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-clang:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -239,13 +336,15 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y clang build-essential cmake libsdl2-dev
|
||||
apt install -y clang build-essential cmake libsdl2-dev git
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
ubuntu-latest-gcc-sanitized:
|
||||
runs-on: ubuntu-latest
|
||||
ubuntu-22-gcc-sanitized:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -267,12 +366,16 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake
|
||||
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
||||
apt install -y build-essential cmake git
|
||||
cmake . -DCMAKE_BUILD_TYPE=Debug \
|
||||
-DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DGGML_OPENMP=OFF
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
ubuntu-22-cmake-sycl:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
@ -302,12 +405,12 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
|
||||
|
||||
- name: install oneAPI MKL library
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt install intel-oneapi-mkl-devel
|
||||
sudo apt install intel-oneapi-mkl-devel git
|
||||
|
||||
- name: Clone
|
||||
id: checkout
|
||||
@ -323,6 +426,8 @@ jobs:
|
||||
cmake --build . --config Release -j $(nproc)
|
||||
|
||||
ubuntu-22-cmake-sycl-fp16:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
@ -352,7 +457,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
|
||||
|
||||
- name: install oneAPI MKL library
|
||||
shell: bash
|
||||
@ -373,6 +478,8 @@ jobs:
|
||||
cmake --build . --config Release -j $(nproc)
|
||||
|
||||
windows-msys2:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
@ -393,6 +500,7 @@ jobs:
|
||||
msystem: ${{matrix.sys}}
|
||||
install: >-
|
||||
base-devel
|
||||
git
|
||||
mingw-w64-${{matrix.env}}-toolchain
|
||||
mingw-w64-${{matrix.env}}-cmake
|
||||
mingw-w64-${{matrix.env}}-SDL2
|
||||
@ -416,6 +524,8 @@ jobs:
|
||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||
|
||||
windows:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
@ -476,6 +586,8 @@ jobs:
|
||||
path: build/bin/${{ matrix.build }}
|
||||
|
||||
windows-blas:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
@ -549,6 +661,8 @@ jobs:
|
||||
path: build/bin/${{ matrix.build }}
|
||||
|
||||
windows-cublas:
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: windows-2019
|
||||
strategy:
|
||||
matrix:
|
||||
@ -565,15 +679,134 @@ jobs:
|
||||
- name: Clone repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Ninja
|
||||
id: install_ninja
|
||||
run: |
|
||||
choco install ninja
|
||||
|
||||
- name: Install ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||
with:
|
||||
key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
|
||||
variant: sccache
|
||||
evict-old-files: 5d
|
||||
|
||||
- name: Install Cuda Toolkit 11.8.0
|
||||
if: ${{ matrix.cuda-toolkit == '11.8.0' }}
|
||||
run: |
|
||||
$CUDA_VERSION = ${{ matrix.cuda-toolkit }}
|
||||
$CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
|
||||
$CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
|
||||
|
||||
# Components versions
|
||||
$CUDART_VER = "11.8.89"
|
||||
$NVCC_VER = "11.8.89"
|
||||
$NVRTC_VER = "11.8.89"
|
||||
$CUBLAS_VER = "11.8.1.74"
|
||||
$NVTX_VER = "11.8.86"
|
||||
$VS_VER = "11.8.86"
|
||||
$NVPROF_VER = "11.8.87"
|
||||
$CCCL_VER = "11.8.89"
|
||||
|
||||
# Create the directory where the CUDA Toolkit will be installed
|
||||
mkdir -p $CUDA_TOOLKIT_DIR
|
||||
|
||||
# Install unzip to extract the downloaded files
|
||||
choco install unzip -y
|
||||
|
||||
# Download all the required components
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
|
||||
|
||||
# Extract all the downloaded files to the CUDA Toolkit directory
|
||||
unzip '*.zip' -d $CUDA_TOOLKIT_DIR
|
||||
|
||||
# Copy all the extracted files to the main CUDA Toolkit directory
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
|
||||
# Visual Studio integration
|
||||
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
|
||||
|
||||
# Set environment variables
|
||||
echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
||||
echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
||||
|
||||
- name: Install Cuda Toolkit 12.2.0
|
||||
if: ${{ matrix.cuda-toolkit == '12.2.0' }}
|
||||
run: |
|
||||
$CUDA_VERSION = ${{ matrix.cuda-toolkit }}
|
||||
$CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
|
||||
$CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
|
||||
|
||||
# Components versions
|
||||
$CUDART_VER = "12.2.140"
|
||||
$NVCC_VER = "12.2.140"
|
||||
$NVRTC_VER = "12.2.140"
|
||||
$CUBLAS_VER = "12.2.5.6"
|
||||
$NVTX_VER = "12.2.140"
|
||||
$PROFILER_VER = "12.2.140"
|
||||
$VS_VER = "12.2.140"
|
||||
$NVPROF_VER = "12.2.142"
|
||||
$CCCL_VER = "12.2.140"
|
||||
|
||||
# Create the directory where the CUDA Toolkit will be installed
|
||||
mkdir -p $CUDA_TOOLKIT_DIR
|
||||
|
||||
# Install unzip to extract the downloaded files
|
||||
choco install unzip -y
|
||||
|
||||
# Download all the required components
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
|
||||
curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
|
||||
|
||||
# Extract all the downloaded files to the CUDA Toolkit directory
|
||||
unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
|
||||
|
||||
# Copy all the extracted files to the main CUDA Toolkit directory
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
|
||||
|
||||
# Visual Studio integration
|
||||
xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y
|
||||
|
||||
# Set environment variables
|
||||
echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
||||
echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
||||
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
|
||||
- name: Install CUDA Toolkit
|
||||
id: cuda-toolkit
|
||||
uses: Jimver/cuda-toolkit@v0.2.15
|
||||
with:
|
||||
cuda: '${{ matrix.cuda-toolkit }}'
|
||||
|
||||
- name: Install 7-Zip
|
||||
run: choco install 7zip -y
|
||||
|
||||
@ -584,26 +817,31 @@ jobs:
|
||||
7z x sdl2.zip
|
||||
echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||
echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
|
||||
|
||||
- name: Configure CMake
|
||||
shell: cmd
|
||||
run: |
|
||||
cmake -S . -B ./build -A ${{ matrix.arch }} ^
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
|
||||
-DGGML_CUDA=${{ matrix.cublas }} ^
|
||||
-DCMAKE_CUDA_ARCHITECTURES=all ^
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
|
||||
-DSDL2_DIR="%SDL2_DIR%"
|
||||
|
||||
|
||||
- name: Install cmake
|
||||
run: choco install cmake
|
||||
|
||||
- name: Build Project
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ./build
|
||||
cmake --build . --config ${{ matrix.build }}
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake --version
|
||||
where cmake
|
||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
|
||||
-DGGML_CUDA=${{ matrix.cublas }} ^
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
|
||||
-DSDL2_DIR="%SDL2_DIR%"
|
||||
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
||||
cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
|
||||
|
||||
- name: Check sccache status after build
|
||||
run: |
|
||||
sccache --show-stats
|
||||
|
||||
- name: Copy CUDA DLLs
|
||||
run: |
|
||||
Get-ChildItem "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/" -Filter "*.dll" |
|
||||
Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
|
||||
Copy-Item -Destination "build/bin/${{ matrix.build }}"
|
||||
|
||||
- name: Copy SDL2.dll
|
||||
@ -617,7 +855,9 @@ jobs:
|
||||
path: build/bin/${{ matrix.build }}
|
||||
|
||||
emscripten:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
@ -640,6 +880,7 @@ jobs:
|
||||
|
||||
ios-xcode-build:
|
||||
runs-on: macos-latest
|
||||
needs: determine-tag
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
@ -670,21 +911,39 @@ jobs:
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||
sudo cmake --install . --config Release
|
||||
|
||||
- name: xcodebuild for swift package
|
||||
id: xcodebuild
|
||||
run: |
|
||||
xcodebuild -scheme whisper-Package -destination 'generic/platform=iOS'
|
||||
./build-xcframework.sh
|
||||
|
||||
#- name: Build objc example
|
||||
# run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos build
|
||||
- name: Build objc example
|
||||
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
|
||||
|
||||
- name: Build swiftui example
|
||||
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
||||
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
|
||||
github.event.inputs.create_release == 'true' ||
|
||||
github.event.inputs.pre_release_tag != '' }}
|
||||
run: |
|
||||
zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
|
||||
|
||||
- name: Upload artifacts
|
||||
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
|
||||
github.event.inputs.create_release == 'true' ||
|
||||
github.event.inputs.pre_release_tag != '' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
|
||||
name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework
|
||||
|
||||
android:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
@ -712,31 +971,30 @@ jobs:
|
||||
cd whisper/examples/whisper.android
|
||||
./gradlew assembleRelease --no-daemon
|
||||
|
||||
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
|
||||
# android_java:
|
||||
# runs-on: ubuntu-latest
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: set up JDK 11
|
||||
# uses: actions/setup-java@v4
|
||||
# with:
|
||||
# java-version: '11'
|
||||
# distribution: 'temurin'
|
||||
# cache: gradle
|
||||
#
|
||||
# - name: Setup Android SDK
|
||||
# uses: android-actions/setup-android@v3
|
||||
# with:
|
||||
# cmdline-tools-version: 9.0
|
||||
#
|
||||
# - name: Build
|
||||
# run: |
|
||||
# cd examples/whisper.android.java
|
||||
# chmod +x ./gradlew
|
||||
# ./gradlew assembleRelease
|
||||
android_java:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: set up JDK 11
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: '11'
|
||||
distribution: 'temurin'
|
||||
cache: gradle
|
||||
|
||||
- name: Setup Android SDK
|
||||
uses: android-actions/setup-android@v3
|
||||
with:
|
||||
cmdline-tools-version: 9.0
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cd examples/whisper.android.java
|
||||
chmod +x ./gradlew
|
||||
./gradlew assembleRelease
|
||||
|
||||
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
|
||||
# java:
|
||||
@ -783,7 +1041,9 @@ jobs:
|
||||
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
|
||||
quantize:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
|
||||
github.event.inputs.run_type == 'full-ci' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
@ -795,3 +1055,95 @@ jobs:
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
|
||||
|
||||
release:
|
||||
if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' }}
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
needs:
|
||||
- determine-tag
|
||||
- ios-xcode-build
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||
with:
|
||||
key: release
|
||||
evict-old-files: 1d
|
||||
|
||||
# Downloads all the artifacts from the previous jobs
|
||||
- name: Download artifacts
|
||||
id: download-artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ./artifact
|
||||
|
||||
- name: Move artifacts
|
||||
id: move_artifacts
|
||||
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
uses: ggml-org/action-create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ needs.determine-tag.outputs.tag_name }}
|
||||
prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
|
||||
|
||||
- name: Upload release
|
||||
id: upload_release
|
||||
uses: actions/github-script@v3
|
||||
with:
|
||||
github-token: ${{secrets.GITHUB_TOKEN}}
|
||||
script: |
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const release_id = '${{ steps.create_release.outputs.id }}';
|
||||
for (let file of await fs.readdirSync('./artifact/release')) {
|
||||
if (path.extname(file) === '.zip') {
|
||||
console.log('uploadReleaseAsset', file);
|
||||
await github.repos.uploadReleaseAsset({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
release_id: release_id,
|
||||
name: file,
|
||||
data: await fs.readFileSync(`./artifact/release/${file}`)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
coreml-base-en:
|
||||
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
|
||||
github.event.inputs.create_release == 'true' ||
|
||||
github.event.inputs.pre_release_tag != '' }}
|
||||
runs-on: macos-latest
|
||||
needs: determine-tag
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set environment variables
|
||||
id: set_vars
|
||||
run: |
|
||||
echo "MODEL_NAME=base.en" >> $GITHUB_ENV
|
||||
echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV
|
||||
|
||||
- name: Download model
|
||||
run: |
|
||||
./models/download-ggml-model.sh ${{ env.MODEL_NAME }}
|
||||
|
||||
- name: Generate CoreML model
|
||||
run: |
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install ane_transformers openai-whisper coremltools
|
||||
./models/generate-coreml-model.sh ${{ env.MODEL_NAME }}
|
||||
|
4
.github/workflows/docker.yml
vendored
4
.github/workflows/docker.yml
vendored
@ -11,7 +11,7 @@ jobs:
|
||||
name: Push Docker image to Docker Hub
|
||||
if: github.event.pull_request.draft == false
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
strategy:
|
||||
@ -28,6 +28,8 @@ jobs:
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:qemu-v7.0.0-28
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
91
.github/workflows/examples-wasm.yml
vendored
Normal file
91
.github/workflows/examples-wasm.yml
vendored
Normal file
@ -0,0 +1,91 @@
|
||||
name: Examples WASM
|
||||
on:
|
||||
push:
|
||||
branches: ["master"]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: "pages"
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy-wasm-github-pages:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v4
|
||||
|
||||
- name: Setup emsdk
|
||||
uses: mymindstorm/setup-emsdk@v14
|
||||
|
||||
- name: Build WASM Examples
|
||||
# Enable for real build later in whisper.cpp
|
||||
run: |
|
||||
mkdir -p build-em && cd build-em
|
||||
emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make -j
|
||||
|
||||
- name: Create staging directory
|
||||
run: mkdir -p staging
|
||||
|
||||
- name: Create .nojekyll file in staging directory
|
||||
run: touch staging/.nojekyll
|
||||
|
||||
- name: Copy application files
|
||||
run: |
|
||||
build_dir=build-em/bin
|
||||
|
||||
ls ${build_dir}
|
||||
|
||||
# command.wasm
|
||||
target_dir=staging/command.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libcommand.js ${target_dir}
|
||||
|
||||
# bench.wasm
|
||||
target_dir=staging/bench.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libbench.js ${target_dir}
|
||||
|
||||
# stream.wasm
|
||||
target_dir=staging/stream.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libstream.js ${target_dir}
|
||||
|
||||
# whisper.wasm (this will be the main example page)
|
||||
target_dir=staging
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libmain.js ${target_dir}
|
||||
|
||||
# Copy Cross-Origin Isolation service worker
|
||||
cp -v examples/coi-serviceworker.js staging/
|
||||
|
||||
- name: List files in staging directory (for debugging)
|
||||
run: |
|
||||
echo "Files in staging directory:"
|
||||
find staging -type f | sort
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: ./staging
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
6
.github/workflows/examples.yml
vendored
6
.github/workflows/examples.yml
vendored
@ -10,8 +10,8 @@ on:
|
||||
- whisper.h
|
||||
|
||||
jobs:
|
||||
addon_node-ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
addon_node-ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [ 16.x, 18.x ]
|
||||
@ -22,7 +22,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install build-essential git
|
||||
sudo apt-get install cmake
|
||||
sudo apt-get install libsdl2-dev
|
||||
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -58,3 +58,5 @@ cmake-build-debug/
|
||||
.cxx/
|
||||
.gradle/
|
||||
local.properties
|
||||
.log
|
||||
.exe
|
211
AUTHORS
211
AUTHORS
@ -1,34 +1,51 @@
|
||||
# date: Tue Apr 9 20:27:03 EEST 2024
|
||||
# date: Tue Feb 4 13:03:35 EET 2025
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0/0 <zero@imaskeleton.me>
|
||||
0cc4m <picard12@live.de>
|
||||
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
||||
65a <10104049+65a@users.noreply.github.com>
|
||||
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
||||
Aaron Taylor <aaron@exphat.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
||||
Adam Jones <domdomegg+git@gmail.com>
|
||||
Adrien Gallouët <adrien@gallouet.fr>
|
||||
Adrien Gallouët <angt@huggingface.co>
|
||||
AfryMask <AfryMask@163.com>
|
||||
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
AidanBeltonS <aidan.belton@codeplay.com>
|
||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||
Akash Mahajan <akash7190@gmail.com>
|
||||
Akash Mahajan <akashmjn@stanford.edu>
|
||||
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
||||
Alan <unknown>
|
||||
Albert Jin <albert.jin@gmail.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
||||
Alex Evgrashin <aevgrashin@yandex.ru>
|
||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
||||
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
||||
Alexandru Mariuti <alex@mariuti.com>
|
||||
Alexey Kharlamov <alexey@kharlamov.biz>
|
||||
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
||||
Ali Alameh <ali.alameh@isae.edu.lb>
|
||||
Alter <0x7c48@gmail.com>
|
||||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
||||
Andreas Lubbe <git@lubbe.org>
|
||||
Andreu Huguet <andreuhuguet@gmail.com>
|
||||
Andrew Huynh <a5thuynh@gmail.com>
|
||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||
Andrew S <andrews54757@gmail.com>
|
||||
Andy Maloney <asmaloney@gmail.com>
|
||||
Anton Kostin <masguit42@users.noreply.github.com>
|
||||
@ -40,8 +57,11 @@ AustinMroz <austinmroz@utexas.edu>
|
||||
Avik Sengupta <avik@sengupta.net>
|
||||
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
||||
Baffin Lee <baffinlee@gmail.com>
|
||||
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
||||
Ben Nortier <bjnortier@gmail.com>
|
||||
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
||||
Binozo <70137898+Binozo@users.noreply.github.com>
|
||||
Bo-Yi Wu <appleboy.tw@gmail.com>
|
||||
Boris Bliznioukov <blib@mail.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
@ -49,47 +69,86 @@ Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
||||
Brian Murray <brian@bmurray.ca>
|
||||
CRD716 <crd716@gmail.com>
|
||||
Canis Lupus <Canis-UK@users.noreply.github.com>
|
||||
Carlos Zoido <mrgalleta@gmail.com>
|
||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||
CarterLi999 <664681047@qq.com>
|
||||
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
||||
Changyeon Kim <cyzero.kim@samsung.com>
|
||||
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||
Charles Xu <charles.xu@arm.com>
|
||||
Chen Xi <xi2.chen@intel.com>
|
||||
Chen Xi <xixichen08@foxmail.com>
|
||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
||||
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
||||
Chidi Williams <williamschidi1@gmail.com>
|
||||
Chris Elrod <elrodc@gmail.com>
|
||||
Christian <12550267+iceychris@users.noreply.github.com>
|
||||
Christian Kastner <ckk@kvr.at>
|
||||
Clifford Heath <clifford.heath@gmail.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Colin <github@whoisc.cc>
|
||||
Conrad Kramer <conrad@conradkramer.com>
|
||||
Corey Earwood <iamcgn+github@gmail.com>
|
||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||
DAN™ <dranger003@gmail.com>
|
||||
DGdev91 <DGdev91@users.noreply.github.com>
|
||||
Damian Czaja <trojan295@protonmail.com>
|
||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
||||
Dan Johansson <dan.johansson@arm.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
|
||||
Daniel Ziegenberg <daniel@ziegenberg.at>
|
||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||
Dave <dave-fl@users.noreply.github.com>
|
||||
Dave Airlie <airlied@gmail.com>
|
||||
Dave Airlie <airlied@redhat.com>
|
||||
Daven Sanassy <daven@vochlea.co.uk>
|
||||
David <dnhkng@gmail.com>
|
||||
David Thorpe <djt@mutablelogic.com>
|
||||
DavidKorczynski <david@adalogics.com>
|
||||
Davidson Francis <davidsondfgl@gmail.com>
|
||||
Dener Stassun <denerstassun@gmail.com>
|
||||
Dibakar Gope <dibakar.gope@arm.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Diego Devesa <slarengh@gmail.com>
|
||||
Digipom <admin@digipom.com>
|
||||
Dimo <dimo@ieee.org>
|
||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
||||
Djip007 <djip.perois@free.fr>
|
||||
Dody Suria Wijaya <dodysw@gmail.com>
|
||||
Dou Xinpeng <15529241576@163.com>
|
||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Duncan McConnell <ddmcconnell4@gmail.com>
|
||||
Egor Egorov <me@egorfine.com>
|
||||
Elkana Bardugo <ttv200@gmail.com>
|
||||
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Eric Curtin <ericcurtin17@gmail.com>
|
||||
Eric Swanson <eswanson@alloscomp.com>
|
||||
Eric Tendian <erictendian@gmail.com>
|
||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Martin <evan.martin@gmail.com>
|
||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
||||
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
||||
Fangjun Kuang <csukuangfj@gmail.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
||||
Frankie Robertson <frankier@users.noreply.github.com>
|
||||
Gang Chen <goncha@gmail.com>
|
||||
Gavin Cai <gavin1818@hotmail.com>
|
||||
George Hindle <george@georgehindle.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gilad S <7817232+giladgd@users.noreply.github.com>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Gleicon Moraes <gleicon@gmail.com>
|
||||
@ -98,41 +157,66 @@ Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Hang <bebound@gmail.com>
|
||||
Haus1 <haus.xda@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
HimariO <dsfhe49854@gmail.com>
|
||||
Hong Bo PENG <penghb@cn.ibm.com>
|
||||
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
||||
Hugo <hugo@whynothugo.nl>
|
||||
Ian Bicking <ian@ianbicking.org>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
||||
Ikko Ashimine <eltociear@gmail.com>
|
||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
||||
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
||||
Ivan <nekotekina@gmail.com>
|
||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||
Ivan Gorin <ivangorin21@gmail.com>
|
||||
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
|
||||
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JacobLinCool <jacoblincool@gmail.com>
|
||||
Jakub Ráček <blizzcz@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jay Binks <jaybinks@gmail.com>
|
||||
Jayant <jayantyadav202@gmail.com>
|
||||
Jeff Bolz <jbolz@nvidia.com>
|
||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
||||
Jhen-Jie Hong <developer@jhen.me>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jo Liss <joliss42@gmail.com>
|
||||
Joe Todd <joe.todd@codeplay.com>
|
||||
Johan <jr.raffin@gmail.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
John Balis <phobossystems@gmail.com>
|
||||
JohnnyB <jboero@users.noreply.github.com>
|
||||
Jonathan Soo <jcsoo@agora.com>
|
||||
Jonno <1160532+razodactyl@users.noreply.github.com>
|
||||
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
||||
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
||||
Josh Bleecher Snyder <josharian@gmail.com>
|
||||
Josscii <jossciiweiyi@gmail.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
||||
Junil Kim <logyourself@gmail.com>
|
||||
Justina Cho <justcho5@gmail.com>
|
||||
Justine Tunney <jtunney@gmail.com>
|
||||
Justine Tunney <jtunney@mozilla.com>
|
||||
KITAITI Makoto <KitaitiMakoto@gmail.com>
|
||||
KP Kaiser <kirk@zothcorp.com>
|
||||
Kamilake <exjang0@gmail.com>
|
||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Kendrick Taylor <kendrick@circuitsix.com>
|
||||
Kevin Brothaler <admin@digipom.com>
|
||||
Kevin Gibbons <bakkot@gmail.com>
|
||||
Konosuke Sakai <konosuke@konosuke.work>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kreijstal <rainb@tfwno.gf>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
@ -147,56 +231,110 @@ Luis Herrera <herrera-luis@users.noreply.github.com>
|
||||
Lukas Rist <glaslos@gmail.com>
|
||||
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
||||
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
||||
Ma Mingfei <mingfei.ma@intel.com>
|
||||
Maciek <maciek.mab122@gmail.com>
|
||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
||||
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
||||
Mark Karpelès <MagicalTux@users.noreply.github.com>
|
||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||
Martin Delille <martin@delille.org>
|
||||
Martin Warnaar <martinwarnaar@gmail.com>
|
||||
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
||||
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
||||
Mathieu Baudier <mbaudier@argeo.org>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matija Pevec <mightymatth@users.noreply.github.com>
|
||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
||||
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Mengqing Cao <cmq0113@163.com>
|
||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||
Michael Rienstra <mrienstra@gmail.com>
|
||||
Mikhail Grigorev <sleuthhound@gmail.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohit Agarwal <mohit@sdf.org>
|
||||
Molly Sophia <mollysophia379@gmail.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
NETZkultur GmbH <mulholland@netzkultur.de>
|
||||
Natsu <chino@hotococoa.moe>
|
||||
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
||||
Nicholas Albion <nalbion@yahoo.com>
|
||||
Nico Bosshard <nico@bosshome.ch>
|
||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
||||
Niels Mayer <Niels.Mayer@gmail.com>
|
||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
||||
Nikolaj Olsson <nikse.dk@gmail.com>
|
||||
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
||||
Oleg Sidorov <me@whitebox.io>
|
||||
Oleg Sidorov <oleg@sidorov.nl>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
Ondrej Kokes <ondrej.kokes@gmail.com>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
PAB <pierreantoine.bannier@gmail.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pedro Probst <pprobst@insiberia.net>
|
||||
Peng <hzp1024@qq.com>
|
||||
Peter <peter277@users.noreply.github.com>
|
||||
Philipp Zabel <philipp.zabel@gmail.com>
|
||||
Philippe Normand <phil@base-art.net>
|
||||
Philippe Normand <philn@igalia.com>
|
||||
Plamen Minev <pacominev@gmail.com>
|
||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
||||
R0CKSTAR <yeahdongcn@gmail.com>
|
||||
Radoslav Gerganov <rgerganov@gmail.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
|
||||
Raiya Araki <83504221+rai62@users.noreply.github.com>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
||||
RelatedTitle <r3latedtitle@gmail.com>
|
||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Rich Jones <miserlou@gmail.com>
|
||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
||||
Robin <robin.xw@hotmail.com>
|
||||
Roddur Dasgupta <roddurd@gmail.com>
|
||||
Roland Rabien <figbug@gmail.com>
|
||||
Romain Biessy <romain.biessy@codeplay.com>
|
||||
Ronsor <ronsor@ronsor.pw>
|
||||
Rotem Dan <rotemdan@gmail.com>
|
||||
Ryan Hitchman <hitchmanr@gmail.com>
|
||||
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
||||
RyanChang <ftes90015@gmail.com>
|
||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||
SXX <sxx1136965276@gmail.com>
|
||||
Sacha Arbonel <sacha.arbonel@hotmail.fr>
|
||||
Salman Faroz <stsfaroz@gmail.com>
|
||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
||||
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
||||
Sam Pullara <spullara@gmail.com>
|
||||
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
|
||||
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
||||
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
Sergio López <slp@redhat.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
Shanshan Shen <467638484@qq.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shupei Fan <dymarkfan@outlook.com>
|
||||
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||
Simon Moisselin <simon.moisstoll@gmail.com>
|
||||
Sindre Sorhus <sindresorhus@gmail.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
||||
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
|
||||
Stefan Sydow <s.sydow@heinlein-video.de>
|
||||
Stefan Sydow <stefan@sydow.email>
|
||||
Syahmi Azhar <prsyahmi@gmail.com>
|
||||
Syed Jafri <syedjafri97@gmail.com>
|
||||
Sơn Phan Trung <phantrungson17@gmail.com>
|
||||
@ -205,37 +343,63 @@ Takeshi Inoue <inoue.takeshi@gmail.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Taras Glek <taras@thegp.com>
|
||||
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
||||
Thamster <Thamster@users.noreply.github.com>
|
||||
Thijs Raymakers <thijs@raymakers.nl>
|
||||
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
||||
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
||||
Tienshiao Ma <tienshiao@tienshiao.org>
|
||||
Tim Miller <drasticactions@users.noreply.github.com>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Tobrun <tobrun.van.nuland@gmail.com>
|
||||
Todd <taf2@users.noreply.github.com>
|
||||
Toliver <teejae@gmail.com>
|
||||
Tong Li <31761981+litongjava@users.noreply.github.com>
|
||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
||||
Topping1 <78745143+Topping1@users.noreply.github.com>
|
||||
Travis Cline <travis.cline@gmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
|
||||
Vadim Peretokin <vperetokin@hey.com>
|
||||
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
||||
Vin Misra <vinith@alum.mit.edu>
|
||||
Vulcan <93451215+trholding@users.noreply.github.com>
|
||||
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
||||
William Tambellini <william.tambellini@gmail.com>
|
||||
William Tambellini <wtambellini@sdl.com>
|
||||
Wilson Silva <wilson.dsigns@gmail.com>
|
||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
||||
Xuan Son Nguyen <thichthat@gmail.com>
|
||||
Yajing Tang <phillis@google.com>
|
||||
Yang Shen <aplshenyang@gmail.com>
|
||||
Yunès <jean.baptiste.yunes@free.fr>
|
||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
||||
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
|
||||
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
||||
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
||||
Zollner <24618122+Zolliner@users.noreply.github.com>
|
||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
||||
agray3 <agray3@users.noreply.github.com>
|
||||
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
||||
aldorof <aldorof@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
amd-dwang <dong.wang@amd.com>
|
||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
||||
andypayne <apayne@gmail.com>
|
||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||
arizhih <40765267+arizhih@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||
be-next <jerome.ramette@gmail.com>
|
||||
bert hubert <bert@hubertnet.nl>
|
||||
billyct <billy_allen@126.com>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
bocytko <bocytko+github@gmail.com>
|
||||
@ -248,7 +412,9 @@ byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cdosoftei <ciprian.dosoftei@gmail.com>
|
||||
clach04 <Chris.Clark@actian.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
compilade <git@compilade.net>
|
||||
conradg <conradjgodfrey@gmail.com>
|
||||
crummyh <elijah@crums.us>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
denersc <denerstassun@gmail.com>
|
||||
dscripka <dscripka@users.noreply.github.com>
|
||||
@ -256,28 +422,55 @@ duthils <duthils@duthils.net>
|
||||
ecneladis <ecneladis@users.noreply.github.com>
|
||||
faker <nspyia2002@gmail.com>
|
||||
fitzsim <fitzsim@fitzsim.org>
|
||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
genevera (she/her) <genevera@users.noreply.github.com>
|
||||
geniusnut <geniusnut@gmail.com>
|
||||
gilbertgong <gilbert.gong@gmail.com>
|
||||
gn64 <yukikaze.jp@gmail.com>
|
||||
goldwaving <77494627+goldwaving@users.noreply.github.com>
|
||||
greeshmay <greeshmay@gmail.com>
|
||||
haopeng <657407891@qq.com>
|
||||
hipudding <huafengchun@gmail.com>
|
||||
hsinhoyeh <yhh92u@gmail.com>
|
||||
hydai <z54981220@gmail.com>
|
||||
iamthad <thadeus.j.fleming@gmail.com>
|
||||
issixx <46835150+issixx@users.noreply.github.com>
|
||||
james wolf <contractorwolf@hotmail.com>
|
||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||
jettoblack <jettoblack@gmail.com>
|
||||
jiez <373447296@qq.com>
|
||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
||||
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
||||
jwijffels <jwijffels@bnosac.be>
|
||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||
kamranjon <kamranjon@gmail.com>
|
||||
katsu560 <katsu560oo-@docomo.ne.jp>
|
||||
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
||||
keyehzy <msamuel@aluno.puc-rio.br>
|
||||
kunnis <kunnis@users.noreply.github.com>
|
||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||
leejet <leejet714@gmail.com>
|
||||
leo-pony <nengjunma@outlook.com>
|
||||
lhez <quic_lih@quicinc.com>
|
||||
litong <31761981+litongjava@users.noreply.github.com>
|
||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||
lnyan <lkwq007@gmail.com>
|
||||
luoyu-intel <yu.luo@intel.com>
|
||||
m.bell <m.bell@techsmith.com>
|
||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||
mashizora <30516315+mashizora@users.noreply.github.com>
|
||||
matt23654 <matthew.webber@protonmail.com>
|
||||
matteo <matteogeniaccio@yahoo.it>
|
||||
mgrachten <maarten@grachten.eu>
|
||||
mkiol <mkiol@users.noreply.github.com>
|
||||
mky_coder <47767389+mkycoder@users.noreply.github.com>
|
||||
novag <7754358+novag@users.noreply.github.com>
|
||||
pajowu <pajowu@pajowu.de>
|
||||
pengxin99 <pengxin.yuan@intel.com>
|
||||
petterreinholdtsen <pere-github@hungry.com>
|
||||
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
||||
rlapray <lapray.romain@gmail.com>
|
||||
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
@ -287,15 +480,31 @@ shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
slashlib <slashlib@users.noreply.github.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
||||
st-gr <38470677+st-gr@users.noreply.github.com>
|
||||
stduhpf <stephduh@live.fr>
|
||||
stormofice <58337328+stormofice@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thefinaldegree <thefinaldegree@gmail.com>
|
||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||
toboil-features <160222185+toboil-features@users.noreply.github.com>
|
||||
trixirt <trix@redhat.com>
|
||||
ulatekh <ulatekh@yahoo.com>
|
||||
undef <undefdev@gmail.com>
|
||||
uvos <devnull@uvos.xyz>
|
||||
uvos <philipp@uvos.xyz>
|
||||
valVk <valVk@users.noreply.github.com>
|
||||
venkr <venkateshrameshkumar+1@gmail.com>
|
||||
vicalloy <zbirder@gmail.com>
|
||||
wangshuai09 <391746016@qq.com>
|
||||
woachk <24752637+woachk@users.noreply.github.com>
|
||||
xctan <axunlei@gmail.com>
|
||||
xdrudis <xavierdrudis@yahoo.es>
|
||||
yuri@FreeBSD <yuri@FreeBSD>
|
||||
zhangjixiong <code.zjx@gmail.com>
|
||||
zhentaoyu <zhentao.yu@intel.com>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
zhouwg <zhouwg2000@gmail.com>
|
||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
||||
布客飞龙 <562826179@qq.com>
|
||||
Артём Земляк <azemlyak@smart-consulting.ru>
|
||||
|
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||
project("whisper.cpp" C CXX)
|
||||
project("whisper.cpp" VERSION 1.7.4)
|
||||
project("whisper.cpp" VERSION 1.7.5)
|
||||
include(CheckIncludeFileCXX)
|
||||
|
||||
set(SOVERSION 1)
|
||||
@ -38,8 +38,13 @@ if (EMSCRIPTEN)
|
||||
|
||||
# TODO: without these, we get the following error:
|
||||
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
|
||||
else()
|
||||
if (MINGW)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
@ -62,7 +67,8 @@ option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings"
|
||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||
|
||||
# build
|
||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||
option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
|
||||
|
||||
# sanitizers
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
@ -121,7 +127,15 @@ whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||
#
|
||||
|
||||
if (NOT TARGET ggml)
|
||||
add_subdirectory(ggml)
|
||||
if (WHISPER_USE_SYSTEM_GGML)
|
||||
find_package(ggml REQUIRED)
|
||||
if (NOT ggml_FOUND)
|
||||
message(FATAL_ERROR "System-installed GGML library not found.")
|
||||
endif()
|
||||
add_library(ggml ALIAS ggml::ggml)
|
||||
else()
|
||||
add_subdirectory(ggml)
|
||||
endif()
|
||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||
endif()
|
||||
add_subdirectory(src)
|
||||
@ -176,8 +190,8 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
#
|
||||
|
||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
#include(CTest)
|
||||
#add_subdirectory(tests)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (WHISPER_BUILD_EXAMPLES)
|
||||
|
13
Makefile
13
Makefile
@ -18,17 +18,6 @@ samples:
|
||||
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
||||
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
||||
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
|
||||
@echo "Converting to 16-bit WAV ..."
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
||||
@rm samples/*.ogg
|
||||
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
|
||||
@rm samples/mm1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
|
||||
@rm samples/a13.mp3
|
||||
@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
|
||||
@rm samples/diffusion2023-07-03.flac
|
||||
|
||||
#
|
||||
# Models
|
||||
@ -59,7 +48,7 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
|
||||
@echo "Running $@ on all samples in ./samples ..."
|
||||
@echo "==============================================="
|
||||
@echo ""
|
||||
@for f in samples/*.wav; do \
|
||||
@for f in samples/*$(.flac .mp3 .ogg .wav); do \
|
||||
echo "----------------------------------------------" ; \
|
||||
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
||||
echo "----------------------------------------------" ; \
|
||||
|
@ -1,19 +0,0 @@
|
||||
// swift-tools-version:5.5
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "whisper",
|
||||
platforms: [
|
||||
.macOS(.v12),
|
||||
.iOS(.v14),
|
||||
.watchOS(.v4),
|
||||
.tvOS(.v14)
|
||||
],
|
||||
products: [
|
||||
.library(name: "whisper", targets: ["whisper"]),
|
||||
],
|
||||
targets: [
|
||||
.systemLibrary(name: "whisper", pkgConfig: "whisper"),
|
||||
]
|
||||
)
|
62
README.md
62
README.md
@ -7,14 +7,17 @@
|
||||
[](https://conan.io/center/whisper-cpp)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
> [!NOTE]
|
||||
> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788
|
||||
|
||||
Stable: [v1.7.5](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.5) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
- Plain C/C++ implementation without dependencies
|
||||
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
||||
- AVX intrinsics support for x86 architectures
|
||||
- VSX intrinsics support for POWER architectures
|
||||
- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
|
||||
- Mixed F16 / F32 precision
|
||||
- [Integer quantization support](#quantization)
|
||||
- Zero memory allocations at runtime
|
||||
@ -136,6 +139,20 @@ make -j large-v3-turbo
|
||||
| medium | 1.5 GiB | ~2.1 GB |
|
||||
| large | 2.9 GiB | ~3.9 GB |
|
||||
|
||||
## POWER VSX Intrinsics
|
||||
|
||||
`whisper.cpp` supports POWER architectures and includes code which
|
||||
significantly speeds operation on Linux running on POWER9/10, making it
|
||||
capable of faster-than-realtime transcription on underclocked Raptor
|
||||
Talos II. Ensure you have a BLAS package installed, and replace the
|
||||
standard cmake setup with:
|
||||
|
||||
```bash
|
||||
# build with GGML_BLAS defined
|
||||
cmake -B build -DGGML_BLAS=1
|
||||
cmake --build build --config Release
|
||||
./build/bin/whisper-cli [ .. etc .. ]
|
||||
|
||||
## Quantization
|
||||
|
||||
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
||||
@ -167,11 +184,11 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in
|
||||
```
|
||||
|
||||
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
|
||||
- Python 3.10 is recommended.
|
||||
- Python 3.11 is recommended.
|
||||
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
|
||||
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
|
||||
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
|
||||
- To activate the environment, use: `conda activate py310-whisper`
|
||||
- To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
|
||||
- To activate the environment, use: `conda activate py311-whisper`
|
||||
|
||||
- Generate a Core ML model. For example, to generate a `base.en` model, use:
|
||||
|
||||
@ -360,6 +377,38 @@ Run the inference examples as usual, for example:
|
||||
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
||||
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
||||
|
||||
## Docker
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker must be installed and running on your system.
|
||||
- Create a folder to store big models & intermediate files (ex. /whisper/models)
|
||||
|
||||
### Images
|
||||
|
||||
We have two Docker images available for this project:
|
||||
|
||||
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
||||
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
||||
|
||||
### Usage
|
||||
|
||||
```shell
|
||||
# download model and persist it in a local folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./models/download-ggml-model.sh base /models"
|
||||
# transcribe an audio file
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
-v path/to/audios:/audios \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
||||
# transcribe an audio file in samples folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
||||
```
|
||||
|
||||
## Installing with Conan
|
||||
|
||||
You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
|
||||
@ -378,7 +427,8 @@ For detailed instructions on how to use Conan, please refer to the [Conan docume
|
||||
|
||||
This is a naive example of performing real-time inference on audio from your microphone.
|
||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
|
||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||
You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly.
|
||||
|
||||
```bash
|
||||
cmake -B build -DWHISPER_SDL2=ON
|
||||
|
@ -1,5 +0,0 @@
|
||||
module whisper [system] {
|
||||
header "whisper.h"
|
||||
link "whisper"
|
||||
export *
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <whisper.h>
|
||||
|
@ -11,11 +11,11 @@ UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
||||
BUILD_DIR := build
|
||||
BUILD_DIR := build_go
|
||||
MODELS_DIR := models
|
||||
EXAMPLES_DIR := $(wildcard examples/*)
|
||||
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
||||
LIBRARY_PATH := $(abspath ../..)
|
||||
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
|
||||
|
||||
ifeq ($(GGML_CUDA),1)
|
||||
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
||||
@ -29,8 +29,10 @@ endif
|
||||
all: clean whisper examples
|
||||
|
||||
whisper: mkdir
|
||||
@echo Build whisper
|
||||
@${MAKE} -C ../.. libwhisper.a
|
||||
cmake -S ../.. -B ../../${BUILD_DIR} \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
cmake --build ../../${BUILD_DIR} --target whisper
|
||||
|
||||
test: model-small whisper modtidy
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
|
@ -31,7 +31,7 @@ func main() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := context.Process(samples, nil, nil); err != nil {
|
||||
if err := context.Process(samples, nil, nil, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -9,22 +9,23 @@ import (
|
||||
// ContextForSignal returns a context object which is cancelled when a signal
|
||||
// is received. It returns nil if no signal parameter is provided
|
||||
func ContextForSignal(signals ...os.Signal) context.Context {
|
||||
if len(signals) == 0 {
|
||||
return nil
|
||||
}
|
||||
if len(signals) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ch := make(chan os.Signal)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Send message on channel when signal received
|
||||
signal.Notify(ch, signals...)
|
||||
// Send message on channel when signal received
|
||||
signal.Notify(ch, signals...)
|
||||
|
||||
// When any signal received, call cancel
|
||||
go func() {
|
||||
<-ch
|
||||
cancel()
|
||||
}()
|
||||
// When any signal is received, call cancel
|
||||
go func() {
|
||||
<-ch
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Return success
|
||||
return ctx
|
||||
// Return success
|
||||
return ctx
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
@ -17,14 +18,27 @@ import (
|
||||
// CONSTANTS
|
||||
|
||||
const (
|
||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
)
|
||||
|
||||
var (
|
||||
// The models which will be downloaded, if no model is specified as an argument
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
|
||||
modelNames = []string{
|
||||
"tiny", "tiny-q5_1", "tiny-q8_0",
|
||||
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
|
||||
"base", "base-q5_1", "base-q8_0",
|
||||
"base.en", "base.en-q5_1", "base.en-q8_0",
|
||||
"small", "small-q5_1", "small-q8_0",
|
||||
"small.en", "small.en-q5_1", "small.en-q8_0",
|
||||
"medium", "medium-q5_0", "medium-q8_0",
|
||||
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
|
||||
"large-v1",
|
||||
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
|
||||
"large-v3", "large-v3-q5_0",
|
||||
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
@ -44,7 +58,25 @@ var (
|
||||
func main() {
|
||||
flag.Usage = func() {
|
||||
name := filepath.Base(flag.CommandLine.Name())
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
|
||||
fmt.Fprintf(flag.CommandLine.Output(), `
|
||||
Usage: %s [options] [<model>...]
|
||||
|
||||
Options:
|
||||
-out string Specify the output folder where models will be saved.
|
||||
Default: Current working directory.
|
||||
-timeout duration Set the maximum duration for downloading a model.
|
||||
Example: 10m, 1h (default: 30m0s).
|
||||
-quiet Suppress all output except errors.
|
||||
|
||||
Examples:
|
||||
1. Download a specific model:
|
||||
%s -out ./models tiny-q8_0
|
||||
|
||||
2. Download all models:
|
||||
%s -out ./models
|
||||
|
||||
`, name, name, name)
|
||||
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
flag.Parse()
|
||||
@ -114,23 +146,87 @@ func GetOut() (string, error) {
|
||||
// GetModels returns the list of models to download
|
||||
func GetModels() []string {
|
||||
if flag.NArg() == 0 {
|
||||
return modelNames
|
||||
} else {
|
||||
return flag.Args()
|
||||
fmt.Println("No model specified.")
|
||||
fmt.Println("Preparing to download all models...")
|
||||
|
||||
// Calculate total download size
|
||||
fmt.Println("Calculating total download size...")
|
||||
totalSize, err := CalculateTotalDownloadSize(modelNames)
|
||||
if err != nil {
|
||||
fmt.Println("Error calculating download sizes:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
|
||||
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
|
||||
fmt.Println("Would you like to download all models? (y/N)")
|
||||
|
||||
// Prompt for user input
|
||||
var response string
|
||||
fmt.Scanln(&response)
|
||||
if response != "y" && response != "Y" {
|
||||
fmt.Println("Aborting. Specify a model to download.")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
return modelNames // Return all models if confirmed
|
||||
}
|
||||
return flag.Args() // Return specific models if arguments are provided
|
||||
}
|
||||
|
||||
func CalculateTotalDownloadSize(models []string) (int64, error) {
|
||||
var totalSize int64
|
||||
client := http.Client{}
|
||||
|
||||
for _, model := range models {
|
||||
modelURL, err := URLForModel(model)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Issue a HEAD request to get the file size
|
||||
req, err := http.NewRequest("HEAD", modelURL, nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
|
||||
continue
|
||||
}
|
||||
|
||||
size := resp.ContentLength
|
||||
totalSize += size
|
||||
}
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
// URLForModel returns the URL for the given model on huggingface.co
|
||||
func URLForModel(model string) (string, error) {
|
||||
// Ensure "ggml-" prefix is added only once
|
||||
if !strings.HasPrefix(model, "ggml-") {
|
||||
model = "ggml-" + model
|
||||
}
|
||||
|
||||
// Ensure ".bin" extension is added only once
|
||||
if filepath.Ext(model) != srcExt {
|
||||
model += srcExt
|
||||
}
|
||||
|
||||
// Parse the base URL
|
||||
url, err := url.Parse(srcUrl)
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else {
|
||||
url.Path = filepath.Join(url.Path, model)
|
||||
}
|
||||
|
||||
// Ensure no trailing slash in the base URL
|
||||
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
|
||||
return url.String(), nil
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
||||
// Process the data
|
||||
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
||||
context.ResetTimings()
|
||||
if err := context.Process(data, cb, nil); err != nil {
|
||||
if err := context.Process(data, nil, cb, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -71,6 +71,10 @@ func (context *context) Language() string {
|
||||
return whisper.Whisper_lang_str(context.params.Language())
|
||||
}
|
||||
|
||||
func (context *context) DetectedLanguage() string {
|
||||
return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
|
||||
}
|
||||
|
||||
// Set translate flag
|
||||
func (context *context) SetTranslate(v bool) {
|
||||
context.params.SetTranslate(v)
|
||||
@ -189,6 +193,7 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
|
||||
// Process new sample data and return any errors
|
||||
func (context *context) Process(
|
||||
data []float32,
|
||||
callEncoderBegin EncoderBeginCallback,
|
||||
callNewSegment SegmentCallback,
|
||||
callProgress ProgressCallback,
|
||||
) error {
|
||||
@ -203,7 +208,20 @@ func (context *context) Process(
|
||||
// We don't do parallel processing at the moment
|
||||
processors := 0
|
||||
if processors > 1 {
|
||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
|
||||
func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
|
||||
func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
@ -211,22 +229,11 @@ func (context *context) Process(
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}, func(progress int) {
|
||||
if callProgress != nil {
|
||||
callProgress(progress)
|
||||
}
|
||||
}
|
||||
}, func(progress int) {
|
||||
if callProgress != nil {
|
||||
callProgress(progress)
|
||||
}
|
||||
}); err != nil {
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,37 @@ func TestProcess(t *testing.T) {
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
err = context.Process(data, nil, nil)
|
||||
err = context.Process(data, nil, nil, nil)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestDetectedLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
fh, err := os.Open(SamplePath)
|
||||
assert.NoError(err)
|
||||
defer fh.Close()
|
||||
|
||||
// Decode the WAV file - load the full buffer
|
||||
dec := wav.NewDecoder(fh)
|
||||
buf, err := dec.FullPCMBuffer()
|
||||
assert.NoError(err)
|
||||
assert.Equal(uint16(1), dec.NumChans)
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
err = context.Process(data, nil, nil, nil)
|
||||
assert.NoError(err)
|
||||
|
||||
expectedLanguage := "en"
|
||||
actualLanguage := context.DetectedLanguage()
|
||||
assert.Equal(expectedLanguage, actualLanguage)
|
||||
}
|
||||
|
@ -16,6 +16,10 @@ type SegmentCallback func(Segment)
|
||||
// processing. It is called during the Process function
|
||||
type ProgressCallback func(int)
|
||||
|
||||
// EncoderBeginCallback is the callback function for checking if we want to
|
||||
// continue processing. It is called during the Process function
|
||||
type EncoderBeginCallback func() bool
|
||||
|
||||
// Model is the interface to a whisper model. Create a new model with the
|
||||
// function whisper.New(string)
|
||||
type Model interface {
|
||||
@ -31,12 +35,13 @@ type Model interface {
|
||||
Languages() []string
|
||||
}
|
||||
|
||||
// Context is the speach recognition context.
|
||||
// Context is the speech recognition context.
|
||||
type Context interface {
|
||||
SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
|
||||
SetTranslate(bool) // Set translate flag
|
||||
IsMultilingual() bool // Return true if the model is multilingual.
|
||||
Language() string // Get language
|
||||
DetectedLanguage() string // Get detected language
|
||||
|
||||
SetOffset(time.Duration) // Set offset
|
||||
SetDuration(time.Duration) // Set duration
|
||||
@ -58,7 +63,7 @@ type Context interface {
|
||||
// Process mono audio data and return any errors.
|
||||
// If defined, newly generated segments are passed to the
|
||||
// callback function during processing.
|
||||
Process([]float32, SegmentCallback, ProgressCallback) error
|
||||
Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
|
||||
|
||||
// After process is called, return segments until the end of the stream
|
||||
// is reached, when io.EOF is returned.
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
// CGO
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
||||
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
|
||||
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
||||
#include <whisper.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -25,13 +25,13 @@ sourceSets {
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperDynlib', Copy) {
|
||||
from '../../build'
|
||||
include 'libwhisper.dynlib'
|
||||
from '../../build/src'
|
||||
include 'libwhisper.dylib'
|
||||
into 'build/generated/resources/main/darwin'
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperSo', Copy) {
|
||||
from '../../build'
|
||||
from '../../build/src'
|
||||
include 'libwhisper.so'
|
||||
into 'build/generated/resources/main/linux-x86-64'
|
||||
}
|
||||
@ -55,7 +55,12 @@ java {
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
sourcesJar() {
|
||||
dependsOn copyLibs
|
||||
}
|
||||
|
||||
jar {
|
||||
dependsOn copyLibs
|
||||
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
||||
}
|
||||
|
||||
@ -67,6 +72,9 @@ tasks.withType(Test) {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
test.dependsOn copyLibs
|
||||
processResources.dependsOn copyLibs
|
||||
|
||||
dependencies {
|
||||
implementation "net.java.dev.jna:jna:5.13.0"
|
||||
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
||||
|
0
bindings/java/gradlew
vendored
Normal file → Executable file
0
bindings/java/gradlew
vendored
Normal file → Executable file
@ -0,0 +1,24 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
/**
|
||||
* Presets for alignment heads in DTW token timestamps
|
||||
*/
|
||||
public class WhisperConstants {
|
||||
// Alignment heads presets
|
||||
public static final int WHISPER_AHEADS_NONE = 0;
|
||||
public static final int WHISPER_AHEADS_TINY_EN = 1;
|
||||
public static final int WHISPER_AHEADS_TINY = 2;
|
||||
public static final int WHISPER_AHEADS_BASE_EN = 3;
|
||||
public static final int WHISPER_AHEADS_BASE = 4;
|
||||
public static final int WHISPER_AHEADS_SMALL_EN = 5;
|
||||
public static final int WHISPER_AHEADS_SMALL = 6;
|
||||
public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
|
||||
public static final int WHISPER_AHEADS_MEDIUM = 8;
|
||||
public static final int WHISPER_AHEADS_LARGE_V1 = 9;
|
||||
public static final int WHISPER_AHEADS_LARGE_V2 = 10;
|
||||
public static final int WHISPER_AHEADS_LARGE_V3 = 11;
|
||||
public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
|
||||
public static final int WHISPER_AHEADS_CUSTOM = 13;
|
||||
public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
|
||||
public static final int WHISPER_AHEADS_COUNT = 15;
|
||||
}
|
@ -1,7 +1,9 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.NativeLong;
|
||||
import com.sun.jna.Structure;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
||||
import io.github.ggerganov.whispercpp.WhisperModel;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
@ -9,33 +11,26 @@ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperContext extends Structure {
|
||||
int t_load_us = 0;
|
||||
int t_start_us = 0;
|
||||
public NativeLong t_load_us;
|
||||
public NativeLong t_start_us;
|
||||
|
||||
/** weight type (FP32 / FP16 / QX) */
|
||||
GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||
public GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||
/** intermediate type (FP32 or FP16) */
|
||||
GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||
public GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||
|
||||
// WhisperModel model;
|
||||
public PointerByReference model;
|
||||
// whisper_vocab vocab;
|
||||
// whisper_state * state = nullptr;
|
||||
public PointerByReference vocab;
|
||||
public PointerByReference state;
|
||||
public WhisperContextParams.ByValue params;
|
||||
|
||||
public Pointer model;
|
||||
public Pointer vocab;
|
||||
public Pointer state;
|
||||
|
||||
/** populated by whisper_init_from_file_with_params() */
|
||||
String path_model;
|
||||
WhisperContextParams params;
|
||||
public Pointer path_model;
|
||||
|
||||
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
||||
// }
|
||||
//
|
||||
// public static class ByValue extends WhisperContext implements Structure.ByValue {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected List<String> getFieldOrder() {
|
||||
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
|
||||
// }
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return List.of("t_load_us", "t_start_us", "wtype", "itype",
|
||||
"params", "model", "vocab", "state", "path_model");
|
||||
}
|
||||
}
|
||||
|
@ -43,11 +43,11 @@ public class WhisperCpp implements AutoCloseable {
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
* @param params - params to use when initialising the context
|
||||
*/
|
||||
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, params);
|
||||
}
|
||||
|
||||
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
||||
if (ctx != null) {
|
||||
lib.whisper_free(ctx);
|
||||
}
|
||||
@ -69,15 +69,13 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
* Returns a ByValue instance to ensure proper parameter passing to native code.
|
||||
*/
|
||||
public WhisperContextParams getContextDefaultParams() {
|
||||
paramsPointer = lib.whisper_context_default_params_by_ref();
|
||||
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
||||
params.read();
|
||||
return params;
|
||||
public WhisperContextParams.ByValue getContextDefaultParams() {
|
||||
WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
|
||||
lib.whisper_context_default_params_by_ref());
|
||||
valueParams.read();
|
||||
return valueParams;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -88,7 +86,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
*
|
||||
* @param strategy - GREEDY
|
||||
*/
|
||||
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||
public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||
Pointer pointer;
|
||||
|
||||
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
||||
@ -104,7 +102,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
pointer = beamParamsPointer;
|
||||
}
|
||||
|
||||
WhisperFullParams params = new WhisperFullParams(pointer);
|
||||
WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
@ -138,15 +136,21 @@ public class WhisperCpp implements AutoCloseable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
/*
|
||||
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
|
||||
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
|
||||
valueParams.read();
|
||||
*/
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
@ -163,12 +167,17 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
return str.toString().trim();
|
||||
}
|
||||
|
||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
|
||||
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
|
||||
valueParams.read();
|
||||
|
||||
if (lib.whisper_full(ctx, valueParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,7 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
* @param params Pointer to whisper_context_params
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
||||
@ -180,12 +180,12 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
/**
|
||||
* @return the id of the specified language, returns -1 if not found.
|
||||
* Examples:
|
||||
* "de" -> 2
|
||||
* "german" -> 2
|
||||
* "de" -> 2
|
||||
* "german" -> 2
|
||||
*/
|
||||
int whisper_lang_id(String lang);
|
||||
|
||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||
String whisper_lang_str(int id);
|
||||
|
||||
/**
|
||||
@ -268,20 +268,21 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
void whisper_free_params(Pointer params);
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
|
||||
|
||||
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
|
||||
//int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
|
||||
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
// Result is stored in the default state of the context
|
||||
// Not thread safe if executed in parallel on the same context.
|
||||
// It seems this approach can offer some speedup in some cases.
|
||||
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
|
||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
|
||||
|
||||
/**
|
||||
* Number of generated text segments.
|
||||
|
@ -0,0 +1,17 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
|
||||
/**
|
||||
* Callback for aborting GGML computation
|
||||
* Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
|
||||
*/
|
||||
public interface GgmlAbortCallback extends Callback {
|
||||
/**
|
||||
* Return true to abort the computation, false to continue
|
||||
*
|
||||
* @param data User data passed to the callback
|
||||
* @return true to abort, false to continue
|
||||
*/
|
||||
boolean invoke(com.sun.jna.Pointer data);
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperAhead extends Structure {
|
||||
|
||||
public int n_text_layer;
|
||||
|
||||
public int n_head;
|
||||
|
||||
public WhisperAhead() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperAhead(int textLayer, int head) {
|
||||
super();
|
||||
this.n_text_layer = textLayer;
|
||||
this.n_head = head;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("n_text_layer", "n_head");
|
||||
}
|
||||
|
||||
public static class ByReference extends WhisperAhead implements Structure.ByReference {}
|
||||
|
||||
public static class ByValue extends WhisperAhead implements Structure.ByValue {}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperAheads extends Structure {
|
||||
public NativeLong n_heads;
|
||||
|
||||
public Pointer heads;
|
||||
|
||||
public WhisperAheads() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create alignment heads from an array of WhisperAhead objects
|
||||
*/
|
||||
public void setHeads(WhisperAhead[] aheadsArray) {
|
||||
this.n_heads = new NativeLong(aheadsArray.length);
|
||||
|
||||
int structSize = aheadsArray[0].size();
|
||||
Memory mem = new Memory(structSize * aheadsArray.length);
|
||||
|
||||
for (int i = 0; i < aheadsArray.length; i++) {
|
||||
aheadsArray[i].write();
|
||||
byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
|
||||
mem.write(i * structSize, buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
this.heads = mem;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("n_heads", "heads");
|
||||
}
|
||||
|
||||
public static class ByReference extends WhisperAheads implements Structure.ByReference {}
|
||||
|
||||
public static class ByValue extends WhisperAheads implements Structure.ByValue {}
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
@ -11,21 +9,73 @@ import java.util.List;
|
||||
* whisper_context_default_params()
|
||||
*/
|
||||
public class WhisperContextParams extends Structure {
|
||||
|
||||
public WhisperContextParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public WhisperContextParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
/** Use GPU for inference (default = true) */
|
||||
public CBool use_gpu;
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
/** Use flash attention (default = false) */
|
||||
public CBool flash_attn;
|
||||
|
||||
/** CUDA device to use (default = 0) */
|
||||
public int gpu_device;
|
||||
|
||||
/** [EXPERIMENTAL] Enable token-level timestamps with DTW (default = false) */
|
||||
public CBool dtw_token_timestamps;
|
||||
|
||||
/** [EXPERIMENTAL] Alignment heads preset for DTW */
|
||||
public int dtw_aheads_preset;
|
||||
|
||||
/** Number of top layers to use for DTW when using WHISPER_AHEADS_N_TOP_MOST preset */
|
||||
public int dtw_n_top;
|
||||
|
||||
public WhisperAheads.ByValue dtw_aheads;
|
||||
|
||||
/** DTW memory size (internal use) */
|
||||
public NativeLong dtw_mem_size;
|
||||
|
||||
/** Use GPU for inference */
|
||||
public void useGpu(boolean enable) {
|
||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Use flash attention */
|
||||
public void useFlashAttn(boolean enable) {
|
||||
flash_attn = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Enable DTW token-level timestamps */
|
||||
public void enableDtwTokenTimestamps(boolean enable) {
|
||||
dtw_token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Set DTW alignment heads preset */
|
||||
public void setDtwAheadsPreset(int preset) {
|
||||
dtw_aheads_preset = preset;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("use_gpu");
|
||||
return Arrays.asList(
|
||||
"use_gpu",
|
||||
"flash_attn",
|
||||
"gpu_device",
|
||||
"dtw_token_timestamps",
|
||||
"dtw_aheads_preset",
|
||||
"dtw_n_top",
|
||||
"dtw_aheads",
|
||||
"dtw_mem_size"
|
||||
);
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperContextParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.GgmlAbortCallback;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
@ -16,10 +17,12 @@ import java.util.List;
|
||||
*/
|
||||
public class WhisperFullParams extends Structure {
|
||||
|
||||
public WhisperFullParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperFullParams(Pointer p) {
|
||||
super(p);
|
||||
// super(p, ALIGN_MSVC);
|
||||
// super(p, ALIGN_GNUC);
|
||||
}
|
||||
|
||||
/** Sampling strategy for whisper_full() function. */
|
||||
@ -69,10 +72,10 @@ public class WhisperFullParams extends Structure {
|
||||
single_segment = single ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public CBool print_special;
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public void printSpecial(boolean enable) {
|
||||
print_special = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
@ -129,6 +132,14 @@ public class WhisperFullParams extends Structure {
|
||||
/** Maximum tokens per segment (0, default = no limit) */
|
||||
public int max_tokens;
|
||||
|
||||
/** [EXPERIMENTAL] Enable debug mode for extra info */
|
||||
public CBool debug_mode;
|
||||
|
||||
/** Enable debug mode */
|
||||
public void enableDebugMode(boolean enable) {
|
||||
debug_mode = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Overwrite the audio context size (0 = use default). */
|
||||
public int audio_ctx;
|
||||
|
||||
@ -274,6 +285,16 @@ public class WhisperFullParams extends Structure {
|
||||
*/
|
||||
public Pointer encoder_begin_callback_user_data;
|
||||
|
||||
/** Callback used to abort GGML computation */
|
||||
public Pointer abort_callback;
|
||||
|
||||
/** User data for the abort_callback */
|
||||
public Pointer abort_callback_user_data;
|
||||
|
||||
public void setAbortCallback(GgmlAbortCallback callback) {
|
||||
abort_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback by each decoder to filter obtained logits.
|
||||
* WhisperLogitsFilterCallback
|
||||
@ -310,17 +331,28 @@ public class WhisperFullParams extends Structure {
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
|
||||
"no_context", "single_segment", "no_timestamps",
|
||||
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
|
||||
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
|
||||
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
"suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
|
||||
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
|
||||
"new_segment_callback", "new_segment_callback_user_data",
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
|
||||
"offset_ms", "duration_ms", "translate", "no_context",
|
||||
"no_timestamps", "single_segment", "print_special",
|
||||
"print_progress", "print_realtime", "print_timestamps",
|
||||
"token_timestamps", "thold_pt", "thold_ptsum", "max_len",
|
||||
"split_on_word", "max_tokens", "debug_mode", "audio_ctx",
|
||||
"tdrz_enable", "suppress_regex", "initial_prompt",
|
||||
"prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
"suppress_blank", "suppress_nst", "temperature",
|
||||
"max_initial_ts", "length_penalty", "temperature_inc",
|
||||
"entropy_thold", "logprob_thold", "no_speech_thold", "greedy",
|
||||
"beam_search", "new_segment_callback", "new_segment_callback_user_data",
|
||||
"progress_callback", "progress_callback_user_data",
|
||||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||
"abort_callback", "abort_callback_user_data",
|
||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperFullParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ class WhisperCppTest {
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
@ -33,6 +33,9 @@ mkdir build-em && cd build-em
|
||||
emcmake cmake .. && make -j
|
||||
|
||||
# run test
|
||||
node ../tests/test-whisper.js
|
||||
|
||||
# For Node.js versions prior to v16.4.0, experimental features need to be enabled:
|
||||
node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||
|
||||
# publish npm package
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.7.4",
|
||||
"version": "1.7.5",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
@ -24,14 +24,15 @@ require "whisper"
|
||||
|
||||
whisper = Whisper::Context.new("base")
|
||||
|
||||
params = Whisper::Params.new
|
||||
params.language = "en"
|
||||
params.offset = 10_000
|
||||
params.duration = 60_000
|
||||
params.max_text_tokens = 300
|
||||
params.translate = true
|
||||
params.print_timestamps = false
|
||||
params.initial_prompt = "Initial prompt here."
|
||||
params = Whisper::Params.new(
|
||||
language: "en",
|
||||
offset: 10_000,
|
||||
duration: 60_000,
|
||||
max_text_tokens: 300,
|
||||
translate: true,
|
||||
print_timestamps: false,
|
||||
initial_prompt: "Initial prompt here."
|
||||
)
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
||||
puts whole_text
|
||||
@ -113,18 +114,18 @@ def format_time(time_ms)
|
||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
||||
end
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params)
|
||||
|
||||
whisper.each_segment.with_index do |segment, index|
|
||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
||||
nth: index + 1,
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||
puts line
|
||||
end
|
||||
whisper
|
||||
.transcribe("path/to/audio.wav", params)
|
||||
.each_segment.with_index do |segment, index|
|
||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
||||
nth: index + 1,
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||
puts line
|
||||
end
|
||||
|
||||
```
|
||||
|
||||
@ -215,10 +216,11 @@ reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :
|
||||
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
|
||||
|
||||
whisper = Whisper::Context.new("base")
|
||||
whisper.full(Whisper::Params.new, samples)
|
||||
whisper.each_segment do |segment|
|
||||
puts segment.text
|
||||
end
|
||||
whisper
|
||||
.full(Whisper::Params.new, samples)
|
||||
.each_segment do |segment|
|
||||
puts segment.text
|
||||
end
|
||||
```
|
||||
|
||||
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
||||
|
@ -18,9 +18,11 @@ EXTSOURCES.each do |src|
|
||||
end
|
||||
|
||||
CLEAN.include SOURCES
|
||||
CLEAN.include FileList["ext/*.o", "ext/*.metal", "ext/whisper.{so,bundle,dll}"]
|
||||
CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"]
|
||||
|
||||
task build: ["ext/Makefile", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp", "whispercpp.gemspec"]
|
||||
SRC = FileList["ext/*.{c,cpp,h}"]
|
||||
|
||||
task build: SOURCES
|
||||
|
||||
directory "pkg"
|
||||
CLOBBER.include "pkg"
|
||||
@ -29,14 +31,14 @@ LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
||||
SO_FILE = File.join("ext", LIB_NAME)
|
||||
LIB_FILE = File.join("lib", LIB_NAME)
|
||||
|
||||
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
|
||||
Dir.chdir "ext" do
|
||||
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
|
||||
chdir "ext" do
|
||||
ruby "extconf.rb"
|
||||
end
|
||||
end
|
||||
|
||||
file SO_FILE => "ext/Makefile" do |t|
|
||||
Dir.chdir "ext" do
|
||||
chdir "ext" do
|
||||
sh "make"
|
||||
end
|
||||
end
|
||||
@ -54,7 +56,7 @@ end
|
||||
|
||||
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
|
||||
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
|
||||
Dir.chdir "tests/jfk_reader" do
|
||||
chdir "tests/jfk_reader" do
|
||||
ruby "extconf.rb"
|
||||
sh "make"
|
||||
end
|
||||
|
12
bindings/ruby/ext/.gitignore
vendored
12
bindings/ruby/ext/.gitignore
vendored
@ -4,10 +4,8 @@ whisper.bundle
|
||||
whisper.dll
|
||||
scripts/get-flags.mk
|
||||
*.o
|
||||
*.c
|
||||
*.cpp
|
||||
*.h
|
||||
*.m
|
||||
*.metal
|
||||
!ruby_whisper.cpp
|
||||
!ruby_whisper.h
|
||||
/*/**/*.c
|
||||
/*/**/*.cpp
|
||||
/*/**/*.h
|
||||
/*/**/*.m
|
||||
/*/**/*.metal
|
||||
|
@ -1,5 +1,7 @@
|
||||
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
||||
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
||||
ggml/src/ggml-cpu/unary-ops.cpp \
|
||||
ggml/src/ggml-cpu/binary-ops.cpp \
|
||||
ggml/include/ggml-backend.h \
|
||||
ggml/include/ggml.h \
|
||||
ggml/include/ggml-alloc.h \
|
||||
|
@ -35,7 +35,7 @@ if $GGML_METAL
|
||||
$GGML_METAL_EMBED_LIBRARY = true
|
||||
end
|
||||
|
||||
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
|
||||
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
|
||||
$MK_CFLAGS = '-std=c11 -fPIC'
|
||||
$MK_CXXFLAGS = '-std=c++17 -fPIC'
|
||||
$MK_NVCCFLAGS = '-std=c++17'
|
||||
@ -168,13 +168,24 @@ $OBJ_GGML <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
|
||||
'ggml/src/ggml-cpu/ggml-cpu-traits.o' <<
|
||||
'ggml/src/ggml-cpu/unary-ops.o' <<
|
||||
'ggml/src/ggml-cpu/binary-ops.o'
|
||||
|
||||
$OBJ_WHISPER <<
|
||||
'src/whisper.o'
|
||||
'src/whisper.o' <<
|
||||
'examples/common.o' <<
|
||||
'examples/common-whisper.o'
|
||||
|
||||
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
||||
$objs << "ruby_whisper.o"
|
||||
$objs <<
|
||||
"ruby_whisper.o" <<
|
||||
"ruby_whisper_context.o" <<
|
||||
"ruby_whisper_transcribe.o" <<
|
||||
"ruby_whisper_params.o" <<
|
||||
"ruby_whisper_error.o" <<
|
||||
"ruby_whisper_segment.o" <<
|
||||
"ruby_whisper_model.o"
|
||||
|
||||
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
||||
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
||||
|
164
bindings/ruby/ext/ruby_whisper.c
Normal file
164
bindings/ruby/ext/ruby_whisper.c
Normal file
@ -0,0 +1,164 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
VALUE mWhisper;
|
||||
VALUE cContext;
|
||||
VALUE cParams;
|
||||
VALUE eError;
|
||||
|
||||
VALUE cSegment;
|
||||
VALUE cModel;
|
||||
|
||||
ID id_to_s;
|
||||
ID id_call;
|
||||
ID id___method__;
|
||||
ID id_to_enum;
|
||||
ID id_length;
|
||||
ID id_next;
|
||||
ID id_new;
|
||||
ID id_to_path;
|
||||
ID id_URI;
|
||||
ID id_pre_converted_models;
|
||||
|
||||
static bool is_log_callback_finalized = false;
|
||||
|
||||
// High level API
|
||||
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
|
||||
|
||||
extern void init_ruby_whisper_context(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_params(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_error(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
|
||||
extern void init_ruby_whisper_model(VALUE *mWhisper);
|
||||
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_max_id -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
|
||||
return INT2NUM(whisper_lang_max_id());
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_id(lang_name) -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
|
||||
const char * lang_str = StringValueCStr(lang);
|
||||
const int id = whisper_lang_id(lang_str);
|
||||
if (-1 == id) {
|
||||
rb_raise(rb_eArgError, "language not found: %s", lang_str);
|
||||
}
|
||||
return INT2NUM(id);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_str(lang_id) -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
|
||||
const int lang_id = NUM2INT(id);
|
||||
const char * str = whisper_lang_str(lang_id);
|
||||
if (NULL == str) {
|
||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||
}
|
||||
return rb_str_new2(str);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_str(lang_id) -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
||||
const int lang_id = NUM2INT(id);
|
||||
const char * str_full = whisper_lang_str_full(lang_id);
|
||||
if (NULL == str_full) {
|
||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||
}
|
||||
return rb_str_new2(str_full);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
||||
is_log_callback_finalized = true;
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static void
|
||||
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
|
||||
if (is_log_callback_finalized) {
|
||||
return;
|
||||
}
|
||||
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
|
||||
VALUE udata = rb_iv_get(mWhisper, "user_data");
|
||||
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
||||
*/
|
||||
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
||||
VALUE old_callback = rb_iv_get(self, "log_callback");
|
||||
if (!NIL_P(old_callback)) {
|
||||
rb_undefine_finalizer(old_callback);
|
||||
}
|
||||
|
||||
rb_iv_set(self, "log_callback", log_callback);
|
||||
rb_iv_set(self, "user_data", user_data);
|
||||
|
||||
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
||||
rb_define_finalizer(log_callback, finalize_log_callback);
|
||||
|
||||
whisper_log_set(ruby_whisper_log_callback, NULL);
|
||||
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
||||
rb_gc_mark(rwm->context);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||
ruby_whisper_model *rwm;
|
||||
rwm = ALLOC(ruby_whisper_model);
|
||||
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
||||
}
|
||||
|
||||
void Init_whisper() {
|
||||
id_to_s = rb_intern("to_s");
|
||||
id_call = rb_intern("call");
|
||||
id___method__ = rb_intern("__method__");
|
||||
id_to_enum = rb_intern("to_enum");
|
||||
id_length = rb_intern("length");
|
||||
id_next = rb_intern("next");
|
||||
id_new = rb_intern("new");
|
||||
id_to_path = rb_intern("to_path");
|
||||
id_URI = rb_intern("URI");
|
||||
id_pre_converted_models = rb_intern("pre_converted_models");
|
||||
|
||||
mWhisper = rb_define_module("Whisper");
|
||||
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
||||
|
||||
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
||||
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
||||
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
||||
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
||||
|
||||
init_ruby_whisper_context(&mWhisper);
|
||||
init_ruby_whisper_params(&mWhisper);
|
||||
init_ruby_whisper_error(&mWhisper);
|
||||
init_ruby_whisper_segment(&mWhisper, &cContext);
|
||||
init_ruby_whisper_model(&mWhisper);
|
||||
|
||||
rb_require("whisper/model/uri");
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -22,4 +22,13 @@ typedef struct {
|
||||
ruby_whisper_callback_container *abort_callback_container;
|
||||
} ruby_whisper_params;
|
||||
|
||||
typedef struct {
|
||||
VALUE context;
|
||||
int index;
|
||||
} ruby_whisper_segment;
|
||||
|
||||
typedef struct {
|
||||
VALUE context;
|
||||
} ruby_whisper_model;
|
||||
|
||||
#endif
|
||||
|
613
bindings/ruby/ext/ruby_whisper_context.c
Normal file
613
bindings/ruby/ext/ruby_whisper_context.c
Normal file
@ -0,0 +1,613 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
extern ID id_to_s;
|
||||
extern ID id___method__;
|
||||
extern ID id_to_enum;
|
||||
extern ID id_length;
|
||||
extern ID id_next;
|
||||
extern ID id_new;
|
||||
extern ID id_to_path;
|
||||
extern ID id_URI;
|
||||
extern ID id_pre_converted_models;
|
||||
|
||||
extern VALUE cContext;
|
||||
extern VALUE eError;
|
||||
extern VALUE cModel;
|
||||
|
||||
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
||||
extern VALUE rb_whisper_model_initialize(VALUE context);
|
||||
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
|
||||
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
||||
|
||||
static void
|
||||
ruby_whisper_free(ruby_whisper *rw)
|
||||
{
|
||||
if (rw->context) {
|
||||
whisper_free(rw->context);
|
||||
rw->context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rb_whisper_mark(ruby_whisper *rw)
|
||||
{
|
||||
// call rb_gc_mark on any ruby references in rw
|
||||
}
|
||||
|
||||
void
|
||||
rb_whisper_free(ruby_whisper *rw)
|
||||
{
|
||||
ruby_whisper_free(rw);
|
||||
free(rw);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_allocate(VALUE klass)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
rw = ALLOC(ruby_whisper);
|
||||
rw->context = NULL;
|
||||
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* new("base.en") -> Whisper::Context
|
||||
* new("path/to/model.bin") -> Whisper::Context
|
||||
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
VALUE whisper_model_file_path;
|
||||
|
||||
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
||||
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
|
||||
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
|
||||
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, whisper_model_file_path);
|
||||
if (!NIL_P(pre_converted_model)) {
|
||||
whisper_model_file_path = pre_converted_model;
|
||||
}
|
||||
if (TYPE(whisper_model_file_path) == T_STRING) {
|
||||
const char * whisper_model_file_path_str = StringValueCStr(whisper_model_file_path);
|
||||
if (strncmp("http://", whisper_model_file_path_str, 7) == 0 || strncmp("https://", whisper_model_file_path_str, 8) == 0) {
|
||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
||||
}
|
||||
}
|
||||
if (rb_obj_is_kind_of(whisper_model_file_path, rb_path2class("URI::HTTP"))) {
|
||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
||||
}
|
||||
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
|
||||
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
|
||||
}
|
||||
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
||||
}
|
||||
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
||||
if (rw->context == NULL) {
|
||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_vocab -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_vocab(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_state(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_head(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_state(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_head(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_mels -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_mels(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_ftype -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_ftype(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_type -> String
|
||||
*/
|
||||
VALUE ruby_whisper_model_type(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*
|
||||
* call-seq:
|
||||
* full(params, samples, n_samples) -> nil
|
||||
* full(params, samples) -> nil
|
||||
*
|
||||
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
||||
*/
|
||||
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
if (argc < 2 || argc > 3) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
VALUE params = argv[0];
|
||||
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||
VALUE samples = argv[1];
|
||||
int n_samples;
|
||||
rb_memory_view_t view;
|
||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||
if (argc == 3) {
|
||||
n_samples = NUM2INT(argv[2]);
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) < n_samples) {
|
||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||
}
|
||||
}
|
||||
// Should check when samples.respond_to?(:length)?
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
n_samples = RARRAY_LEN(samples);
|
||||
} else if (memory_view_available_p) {
|
||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
n_samples = view.byte_size / view.item_size;
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||
}
|
||||
}
|
||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||
if (memory_view_available_p) {
|
||||
c_samples = (float *)view.data;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||
}
|
||||
} else {
|
||||
// TODO: use rb_block_call
|
||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
// TODO: check if iter is exhausted and raise ArgumentError appropriately
|
||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||
c_samples[i] = RFLOAT_VALUE(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
register_callbacks(rwp, &self);
|
||||
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
|
||||
if (0 == result) {
|
||||
return self;
|
||||
} else {
|
||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
* Result is stored in the default state of the context
|
||||
* Not thread safe if executed in parallel on the same context.
|
||||
* It seems this approach can offer some speedup in some cases.
|
||||
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
*
|
||||
* call-seq:
|
||||
* full_parallel(params, samples) -> nil
|
||||
* full_parallel(params, samples, n_samples) -> nil
|
||||
* full_parallel(params, samples, n_samples, n_processors) -> nil
|
||||
* full_parallel(params, samples, nil, n_processors) -> nil
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
||||
{
|
||||
if (argc < 2 || argc > 4) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
VALUE params = argv[0];
|
||||
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||
VALUE samples = argv[1];
|
||||
int n_samples;
|
||||
int n_processors;
|
||||
rb_memory_view_t view;
|
||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||
switch (argc) {
|
||||
case 2:
|
||||
n_processors = 1;
|
||||
break;
|
||||
case 3:
|
||||
n_processors = 1;
|
||||
break;
|
||||
case 4:
|
||||
n_processors = NUM2INT(argv[3]);
|
||||
break;
|
||||
}
|
||||
if (argc >= 3 && !NIL_P(argv[2])) {
|
||||
n_samples = NUM2INT(argv[2]);
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) < n_samples) {
|
||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||
}
|
||||
}
|
||||
// Should check when samples.respond_to?(:length)?
|
||||
} else if (memory_view_available_p) {
|
||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
n_samples = view.byte_size / view.item_size;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
n_samples = RARRAY_LEN(samples);
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||
}
|
||||
}
|
||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||
if (memory_view_available_p) {
|
||||
c_samples = (float *)view.data;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||
}
|
||||
} else {
|
||||
// FIXME: use rb_block_call
|
||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
// TODO: check if iter is exhausted and raise ArgumentError
|
||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||
c_samples[i] = RFLOAT_VALUE(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
register_callbacks(rwp, &self);
|
||||
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
|
||||
if (0 == result) {
|
||||
return self;
|
||||
} else {
|
||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of segments.
|
||||
*
|
||||
* call-seq:
|
||||
* full_n_segments -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_n_segments(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_full_n_segments(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
||||
*
|
||||
* call-seq:
|
||||
* full_lang_id -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_lang_id(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_full_lang_id(rw->context));
|
||||
}
|
||||
|
||||
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment)
|
||||
{
|
||||
const int c_i_segment = NUM2INT(i_segment);
|
||||
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
|
||||
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
|
||||
}
|
||||
return c_i_segment;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
*
|
||||
* full_get_segment_t0(3) # => 1668 (16680 ms)
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_t0(segment_index) -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
||||
return INT2NUM(t0);
|
||||
}
|
||||
|
||||
/*
|
||||
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
*
|
||||
* full_get_segment_t1(3) # => 1668 (16680 ms)
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_t1(segment_index) -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
||||
return INT2NUM(t1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
||||
*
|
||||
* full_get_segment_speacker_turn_next(3) # => true
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_speacker_turn_next(segment_index) -> bool
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
|
||||
return speaker_turn_next ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
/*
|
||||
* Text of a segment indexed by +segment_index+.
|
||||
*
|
||||
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_text(segment_index) -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
|
||||
return rb_str_new2(text);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* full_get_segment_no_speech_prob(segment_index) -> Float
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const float no_speech_prob = whisper_full_get_segment_no_speech_prob(rw->context, c_i_segment);
|
||||
return DBL2NUM(no_speech_prob);
|
||||
}
|
||||
|
||||
// High level API
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
||||
{
|
||||
return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
|
||||
}
|
||||
|
||||
/*
|
||||
* Yields each Whisper::Segment:
|
||||
*
|
||||
* whisper.transcribe("path/to/audio.wav", params)
|
||||
* whisper.each_segment do |segment|
|
||||
* puts segment.text
|
||||
* end
|
||||
*
|
||||
* Returns an Enumerator if no block given:
|
||||
*
|
||||
* whisper.transcribe("path/to/audio.wav", params)
|
||||
* enum = whisper.each_segment
|
||||
* enum.to_a # => [#<Whisper::Segment>, ...]
|
||||
*
|
||||
* call-seq:
|
||||
* each_segment {|segment| ... }
|
||||
* each_segment -> Enumerator
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_each_segment(VALUE self)
|
||||
{
|
||||
if (!rb_block_given_p()) {
|
||||
const VALUE method_name = rb_funcall(self, id___method__, 0);
|
||||
return rb_funcall(self, id_to_enum, 1, method_name);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
rb_yield(rb_whisper_segment_initialize(self, i));
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model -> Whisper::Model
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_get_model(VALUE self)
|
||||
{
|
||||
return rb_whisper_model_initialize(self);
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_context(VALUE *mWhisper)
|
||||
{
|
||||
cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
|
||||
|
||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
||||
|
||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
||||
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
||||
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
||||
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
||||
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
||||
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
||||
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
||||
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
|
||||
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
|
||||
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
|
||||
rb_define_method(cContext, "full_get_segment_no_speech_prob", ruby_whisper_full_get_segment_no_speech_prob, 1);
|
||||
rb_define_method(cContext, "full", ruby_whisper_full, -1);
|
||||
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
|
||||
|
||||
// High leve
|
||||
rb_define_method(cContext, "full_get_segment", ruby_whisper_full_get_segment, 1);
|
||||
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
|
||||
|
||||
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
||||
}
|
52
bindings/ruby/ext/ruby_whisper_error.c
Normal file
52
bindings/ruby/ext/ruby_whisper_error.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include <ruby.h>
|
||||
|
||||
extern VALUE eError;
|
||||
|
||||
VALUE ruby_whisper_error_initialize(VALUE self, VALUE code)
|
||||
{
|
||||
const int c_code = NUM2INT(code);
|
||||
const char *raw_message;
|
||||
switch (c_code) {
|
||||
case -2:
|
||||
raw_message = "failed to compute log mel spectrogram";
|
||||
break;
|
||||
case -3:
|
||||
raw_message = "failed to auto-detect language";
|
||||
break;
|
||||
case -4:
|
||||
raw_message = "too many decoders requested";
|
||||
break;
|
||||
case -5:
|
||||
raw_message = "audio_ctx is larger than the maximum allowed";
|
||||
break;
|
||||
case -6:
|
||||
raw_message = "failed to encode";
|
||||
break;
|
||||
case -7:
|
||||
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
|
||||
break;
|
||||
case -8:
|
||||
raw_message = "failed to decode";
|
||||
break;
|
||||
case -9:
|
||||
raw_message = "failed to decode";
|
||||
break;
|
||||
default:
|
||||
raw_message = "unknown error";
|
||||
break;
|
||||
}
|
||||
const VALUE message = rb_str_new2(raw_message);
|
||||
rb_call_super(1, &message);
|
||||
rb_iv_set(self, "@code", code);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_error(VALUE *mWhisper)
|
||||
{
|
||||
eError = rb_define_class_under(*mWhisper, "Error", rb_eStandardError);
|
||||
|
||||
rb_define_attr(eError, "code", true, false);
|
||||
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
|
||||
}
|
210
bindings/ruby/ext/ruby_whisper_model.c
Normal file
210
bindings/ruby/ext/ruby_whisper_model.c
Normal file
@ -0,0 +1,210 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
extern VALUE cModel;
|
||||
|
||||
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
||||
rb_gc_mark(rwm->context);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||
ruby_whisper_model *rwm;
|
||||
rwm = ALLOC(ruby_whisper_model);
|
||||
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
||||
}
|
||||
|
||||
VALUE rb_whisper_model_initialize(VALUE context) {
|
||||
ruby_whisper_model *rwm;
|
||||
const VALUE model = ruby_whisper_model_allocate(cModel);
|
||||
Data_Get_Struct(model, ruby_whisper_model, rwm);
|
||||
rwm->context = context;
|
||||
return model;
|
||||
};
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_vocab -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_vocab(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_ctx -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_state -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_state(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_head -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_head(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_layer -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_ctx -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_state -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_state(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_head -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_head(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_layer -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_mels -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_mels(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ftype -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_ftype(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* type -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_type(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_model(VALUE *mWhisper)
|
||||
{
|
||||
cModel = rb_define_class_under(*mWhisper, "Model", rb_cObject);
|
||||
|
||||
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
||||
rb_define_method(cModel, "n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||
rb_define_method(cModel, "n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||
rb_define_method(cModel, "n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||
rb_define_method(cModel, "n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||
rb_define_method(cModel, "n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||
rb_define_method(cModel, "n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||
rb_define_method(cModel, "n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||
rb_define_method(cModel, "n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||
rb_define_method(cModel, "n_mels", ruby_whisper_model_n_mels, 0);
|
||||
rb_define_method(cModel, "ftype", ruby_whisper_model_ftype, 0);
|
||||
rb_define_method(cModel, "type", ruby_whisper_model_type, 0);
|
||||
}
|
1077
bindings/ruby/ext/ruby_whisper_params.c
Normal file
1077
bindings/ruby/ext/ruby_whisper_params.c
Normal file
File diff suppressed because it is too large
Load Diff
123
bindings/ruby/ext/ruby_whisper_segment.c
Normal file
123
bindings/ruby/ext/ruby_whisper_segment.c
Normal file
@ -0,0 +1,123 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
extern VALUE cSegment;
|
||||
|
||||
static void
|
||||
rb_whisper_segment_mark(ruby_whisper_segment *rws)
|
||||
{
|
||||
rb_gc_mark(rws->context);
|
||||
}
|
||||
|
||||
VALUE
|
||||
ruby_whisper_segment_allocate(VALUE klass)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
rws = ALLOC(ruby_whisper_segment);
|
||||
return Data_Wrap_Struct(klass, rb_whisper_segment_mark, RUBY_DEFAULT_FREE, rws);
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_whisper_segment_initialize(VALUE context, int index)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
||||
Data_Get_Struct(segment, ruby_whisper_segment, rws);
|
||||
rws->context = context;
|
||||
rws->index = index;
|
||||
return segment;
|
||||
};
|
||||
|
||||
/*
|
||||
* Start time in milliseconds.
|
||||
*
|
||||
* call-seq:
|
||||
* start_time -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_start_time(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return INT2NUM(t0 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* End time in milliseconds.
|
||||
*
|
||||
* call-seq:
|
||||
* end_time -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_end_time(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return INT2NUM(t1 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether the next segment is predicted as a speaker turn.
|
||||
*
|
||||
* call-seq:
|
||||
* speaker_turn_next? -> bool
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* text -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_text(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
||||
return rb_str_new2(text);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* no_speech_prob -> Float
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_no_speech_prob(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
|
||||
{
|
||||
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
|
||||
|
||||
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
||||
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
||||
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
||||
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
||||
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
||||
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
|
||||
}
|
83
bindings/ruby/ext/ruby_whisper_transcribe.cpp
Normal file
83
bindings/ruby/ext/ruby_whisper_transcribe.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
#include "common-whisper.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern ID id_to_s;
|
||||
extern ID id_call;
|
||||
|
||||
extern void
|
||||
register_callbacks(ruby_whisper_params * rwp, VALUE * self);
|
||||
|
||||
/*
|
||||
* transcribe a single file
|
||||
* can emit to a block results
|
||||
*
|
||||
* params = Whisper::Params.new
|
||||
* params.duration = 60_000
|
||||
* whisper.transcribe "path/to/audio.wav", params do |text|
|
||||
* puts text
|
||||
* end
|
||||
*
|
||||
* call-seq:
|
||||
* transcribe(path_to_audio, params) {|text| ...}
|
||||
**/
|
||||
VALUE
|
||||
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
VALUE wave_file_path, blk, params;
|
||||
|
||||
rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk);
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||
|
||||
if (!rb_respond_to(wave_file_path, id_to_s)) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
||||
}
|
||||
|
||||
std::string fname_inp = StringValueCStr(wave_file_path);
|
||||
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
|
||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
{
|
||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return !is_aborted;
|
||||
};
|
||||
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
register_callbacks(rwp, &self);
|
||||
|
||||
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
||||
fprintf(stderr, "failed to process audio\n");
|
||||
return self;
|
||||
}
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
VALUE output = rb_str_new2("");
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(rw->context, i);
|
||||
output = rb_str_concat(output, rb_str_new2(text));
|
||||
}
|
||||
VALUE idCall = id_call;
|
||||
if (blk != Qnil) {
|
||||
rb_funcall(blk, idCall, 1, output);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -65,6 +65,13 @@ module Whisper
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue => err
|
||||
if cache_path.exist?
|
||||
warn err
|
||||
# Use cache file
|
||||
else
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
def download(response)
|
||||
|
@ -20,13 +20,12 @@ module Whisper
|
||||
def self.lang_id: (string name) -> Integer
|
||||
def self.lang_str: (Integer id) -> String
|
||||
def self.lang_str_full: (Integer id) -> String
|
||||
def self.log_set=: (log_callback) -> log_callback
|
||||
def self.finalize_log_callback: (void) -> void # Second argument of ObjectSpace.define_finalizer
|
||||
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
||||
|
||||
class Context
|
||||
def initialize: (string | _ToPath | ::URI::HTTP ) -> void
|
||||
def transcribe: (string, Params) -> void
|
||||
| (string, Params) { (String) -> void } -> void
|
||||
def self.new: (string | _ToPath | ::URI::HTTP) -> instance
|
||||
def transcribe: (string, Params) -> self
|
||||
| (string, Params) { (String) -> void } -> self
|
||||
def model_n_vocab: () -> Integer
|
||||
def model_n_audio_ctx: () -> Integer
|
||||
def model_n_audio_state: () -> Integer
|
||||
@ -35,6 +34,10 @@ module Whisper
|
||||
def model_n_mels: () -> Integer
|
||||
def model_ftype: () -> Integer
|
||||
def model_type: () -> String
|
||||
def each_segment: { (Segment) -> void } -> void
|
||||
| () -> Enumerator[Segment]
|
||||
def model: () -> Model
|
||||
def full_get_segment: (Integer nth) -> Segment
|
||||
def full_n_segments: () -> Integer
|
||||
def full_lang_id: () -> Integer
|
||||
def full_get_segment_t0: (Integer) -> Integer
|
||||
@ -42,18 +45,46 @@ module Whisper
|
||||
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
|
||||
def full_get_segment_text: (Integer) -> String
|
||||
def full_get_segment_no_speech_prob: (Integer) -> Float
|
||||
def full: (Params, Array[Float], ?Integer) -> void
|
||||
| (Params, _Samples, ?Integer) -> void
|
||||
def full_parallel: (Params, Array[Float], ?Integer) -> void
|
||||
| (Params, _Samples, ?Integer) -> void
|
||||
| (Params, _Samples, ?Integer?, Integer) -> void
|
||||
def each_segment: { (Segment) -> void } -> void
|
||||
| () -> Enumerator[Segment]
|
||||
def model: () -> Model
|
||||
def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer n_samples) -> self
|
||||
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
|
||||
end
|
||||
|
||||
class Params
|
||||
def initialize: () -> void
|
||||
def self.new: (
|
||||
?language: string,
|
||||
?translate: boolish,
|
||||
?no_context: boolish,
|
||||
?single_segment: boolish,
|
||||
?print_special: boolish,
|
||||
?print_progress: boolish,
|
||||
?print_realtime: boolish,
|
||||
?print_timestamps: boolish,
|
||||
?suppress_blank: boolish,
|
||||
?suppress_nst: boolish,
|
||||
?token_timestamps: boolish,
|
||||
?split_on_word: boolish,
|
||||
?initial_prompt: string | nil,
|
||||
?diarize: boolish,
|
||||
?offset: Integer,
|
||||
?duration: Integer,
|
||||
?max_text_tokens: Integer,
|
||||
?temperature: Float,
|
||||
?max_initial_ts: Float,
|
||||
?length_penalty: Float,
|
||||
?temperature_inc: Float,
|
||||
?entropy_thold: Float,
|
||||
?logprob_thold: Float,
|
||||
?no_speech_thold: Float,
|
||||
?new_segment_callback: new_segment_callback,
|
||||
?new_segment_callback_user_data: Object,
|
||||
?progress_callback: progress_callback,
|
||||
?progress_callback_user_data: Object,
|
||||
?abort_callback: abort_callback,
|
||||
?abort_callback_user_data: Object
|
||||
) -> instance
|
||||
def language=: (String) -> String # TODO: Enumerate lang names
|
||||
def language: () -> String
|
||||
def translate=: (boolish) -> boolish
|
||||
@ -79,7 +110,7 @@ module Whisper
|
||||
def split_on_word=: (boolish) -> boolish
|
||||
def split_on_word: () -> (true | false)
|
||||
def initial_prompt=: (_ToS) -> _ToS
|
||||
def initial_prompt: () -> String
|
||||
def initial_prompt: () -> (String | nil)
|
||||
def diarize=: (boolish) -> boolish
|
||||
def diarize: () -> (true | false)
|
||||
def offset=: (Integer) -> Integer
|
||||
@ -103,19 +134,25 @@ module Whisper
|
||||
def no_speech_thold=: (Float) -> Float
|
||||
def no_speech_thold: () -> Float
|
||||
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
|
||||
def new_segment_callback: () -> (new_segment_callback | nil)
|
||||
def new_segment_callback_user_data=: (Object) -> Object
|
||||
def new_segment_callback_user_data: () -> Object
|
||||
def progress_callback=: (progress_callback) -> progress_callback
|
||||
def progress_callback: () -> (progress_callback | nil)
|
||||
def progress_callback_user_data=: (Object) -> Object
|
||||
def progress_callback_user_data: () -> Object
|
||||
def abort_callback=: (abort_callback) -> abort_callback
|
||||
def abort_callback: () -> (abort_callback | nil)
|
||||
def abort_callback_user_data=: (Object) -> Object
|
||||
def abort_callback_user_data: () -> Object
|
||||
def on_new_segment: { (Segment) -> void } -> void
|
||||
def on_progress: { (Integer) -> void } -> void
|
||||
def abort_on: { (Object) -> boolish } -> void
|
||||
def on_progress: { (Integer progress) -> void } -> void
|
||||
def abort_on: { (Object user_data) -> boolish } -> void
|
||||
end
|
||||
|
||||
class Model
|
||||
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
||||
def initialize: () -> void
|
||||
def self.new: () -> instance
|
||||
def n_vocab: () -> Integer
|
||||
def n_audio_ctx: () -> Integer
|
||||
def n_audio_state: () -> Integer
|
||||
@ -130,14 +167,13 @@ module Whisper
|
||||
def type: () -> String
|
||||
|
||||
class URI
|
||||
def initialize: (string | ::URI::HTTP) -> void
|
||||
def self.new: (string | ::URI::HTTP) -> self
|
||||
def to_path: -> String
|
||||
def clear_cache: -> void
|
||||
end
|
||||
end
|
||||
|
||||
class Segment
|
||||
def initialize: () -> void
|
||||
def start_time: () -> Integer
|
||||
def end_time: () -> Integer
|
||||
def speaker_next_turn?: () -> (true | false)
|
||||
@ -148,6 +184,6 @@ module Whisper
|
||||
class Error < StandardError
|
||||
attr_reader code: Integer
|
||||
|
||||
def initialize: (Integer) -> void
|
||||
def self.new: (Integer code) -> instance
|
||||
end
|
||||
end
|
||||
|
@ -25,7 +25,7 @@ class TestCallback < TestBase
|
||||
assert start_time >= 0
|
||||
assert_kind_of Integer, end_time
|
||||
assert end_time > 0
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text) if i_segment == 0
|
||||
end
|
||||
}
|
||||
|
||||
@ -145,9 +145,9 @@ class TestCallback < TestBase
|
||||
|
||||
def test_abort_on
|
||||
do_abort = false
|
||||
aborted_from_callback = false
|
||||
_aborted_from_callback = false
|
||||
@params.on_new_segment do |segment|
|
||||
do_abort = true if segment.text.match? /ask/
|
||||
do_abort = true if segment.text.match?(/ask/)
|
||||
end
|
||||
i = 0
|
||||
@params.abort_on do
|
||||
|
@ -4,7 +4,7 @@ class TestError < TestBase
|
||||
def test_error
|
||||
error = Whisper::Error.new(-2)
|
||||
assert_equal "failed to compute log mel spectrogram", error.message
|
||||
assert_equal -2, error.code
|
||||
assert_equal(-2, error.code)
|
||||
end
|
||||
|
||||
def test_unknown_error
|
||||
@ -14,7 +14,7 @@ class TestError < TestBase
|
||||
|
||||
def test_non_int_code
|
||||
assert_raise TypeError do
|
||||
error = Whisper::Error.new("non int")
|
||||
_error = Whisper::Error.new("non int")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,6 +1,39 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestParams < TestBase
|
||||
PARAM_NAMES = [
|
||||
:language,
|
||||
:translate,
|
||||
:no_context,
|
||||
:single_segment,
|
||||
:print_special,
|
||||
:print_progress,
|
||||
:print_realtime,
|
||||
:print_timestamps,
|
||||
:suppress_blank,
|
||||
:suppress_nst,
|
||||
:token_timestamps,
|
||||
:split_on_word,
|
||||
:initial_prompt,
|
||||
:diarize,
|
||||
:offset,
|
||||
:duration,
|
||||
:max_text_tokens,
|
||||
:temperature,
|
||||
:max_initial_ts,
|
||||
:length_penalty,
|
||||
:temperature_inc,
|
||||
:entropy_thold,
|
||||
:logprob_thold,
|
||||
:no_speech_thold,
|
||||
:new_segment_callback,
|
||||
:new_segment_callback_user_data,
|
||||
:progress_callback,
|
||||
:progress_callback_user_data,
|
||||
:abort_callback,
|
||||
:abort_callback_user_data,
|
||||
]
|
||||
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
@ -129,7 +162,7 @@ class TestParams < TestBase
|
||||
end
|
||||
|
||||
def test_length_penalty
|
||||
assert_equal -1.0, @params.length_penalty
|
||||
assert_equal(-1.0, @params.length_penalty)
|
||||
@params.length_penalty = 0.5
|
||||
assert_equal 0.5, @params.length_penalty
|
||||
end
|
||||
@ -147,9 +180,9 @@ class TestParams < TestBase
|
||||
end
|
||||
|
||||
def test_logprob_thold
|
||||
assert_in_delta -1.0, @params.logprob_thold
|
||||
assert_in_delta(-1.0, @params.logprob_thold)
|
||||
@params.logprob_thold = -0.5
|
||||
assert_in_delta -0.5, @params.logprob_thold
|
||||
assert_in_delta(-0.5, @params.logprob_thold)
|
||||
end
|
||||
|
||||
def test_no_speech_thold
|
||||
@ -157,4 +190,57 @@ class TestParams < TestBase
|
||||
@params.no_speech_thold = 0.2
|
||||
assert_in_delta 0.2, @params.no_speech_thold
|
||||
end
|
||||
|
||||
def test_new_with_kw_args
|
||||
params = Whisper::Params.new(language: "es")
|
||||
assert_equal "es", params.language
|
||||
assert_equal 1.0, params.max_initial_ts
|
||||
end
|
||||
|
||||
def test_new_with_kw_args_non_existent
|
||||
assert_raise ArgumentError do
|
||||
Whisper::Params.new(non_existent: "value")
|
||||
end
|
||||
end
|
||||
|
||||
def test_new_with_kw_args_wrong_type
|
||||
assert_raise TypeError do
|
||||
Whisper::Params.new(language: 3)
|
||||
end
|
||||
end
|
||||
|
||||
data(PARAM_NAMES.collect {|param| [param, param]}.to_h)
|
||||
def test_new_with_kw_args_default_values(param)
|
||||
default_value = @params.send(param)
|
||||
value = case [param, default_value]
|
||||
in [*, true | false]
|
||||
!default_value
|
||||
in [*, Integer | Float]
|
||||
default_value + 1
|
||||
in [:language, *]
|
||||
"es"
|
||||
in [:initial_prompt, *]
|
||||
"Initial prompt"
|
||||
in [/_callback\Z/, *]
|
||||
proc {}
|
||||
in [/_user_data\Z/, *]
|
||||
Object.new
|
||||
end
|
||||
params = Whisper::Params.new(param => value)
|
||||
if Float === value
|
||||
assert_in_delta value, params.send(param)
|
||||
else
|
||||
assert_equal value, params.send(param)
|
||||
end
|
||||
|
||||
PARAM_NAMES.reject {|name| name == param}.each do |name|
|
||||
expected = @params.send(name)
|
||||
actual = params.send(name)
|
||||
if Float === expected
|
||||
assert_in_delta expected, actual
|
||||
else
|
||||
assert_equal expected, actual
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -49,13 +49,13 @@ class TestSegment < TestBase
|
||||
if index == 0
|
||||
seg = segment
|
||||
assert_equal 0, segment.start_time
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
whisper.transcribe(AUDIO, params)
|
||||
assert_equal 0, seg.start_time
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, seg.text)
|
||||
end
|
||||
|
||||
def test_on_new_segment_twice
|
||||
|
@ -16,7 +16,7 @@ class TestWhisper < TestBase
|
||||
params.print_timestamps = false
|
||||
|
||||
@whisper.transcribe(AUDIO, params) {|text|
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
|
||||
}
|
||||
end
|
||||
|
||||
@ -29,6 +29,12 @@ class TestWhisper < TestBase
|
||||
assert_equal 0, whisper.full_lang_id
|
||||
end
|
||||
|
||||
def test_full_get_segment
|
||||
segment = whisper.full_get_segment(0)
|
||||
assert_equal 0, segment.start_time
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
||||
end
|
||||
|
||||
def test_full_get_segment_t0
|
||||
assert_equal 0, whisper.full_get_segment_t0(0)
|
||||
assert_raise IndexError do
|
||||
@ -53,7 +59,7 @@ class TestWhisper < TestBase
|
||||
end
|
||||
|
||||
def test_full_get_segment_text
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0))
|
||||
end
|
||||
|
||||
def test_full_get_segment_no_speech_prob
|
||||
@ -128,14 +134,14 @@ class TestWhisper < TestBase
|
||||
@whisper.full(@params, @samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_without_length
|
||||
@whisper.full(@params, @samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_enumerator
|
||||
@ -143,7 +149,7 @@ class TestWhisper < TestBase
|
||||
@whisper.full(@params, samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_enumerator_without_length
|
||||
@ -165,26 +171,28 @@ class TestWhisper < TestBase
|
||||
@whisper.full(@params, samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_parallel
|
||||
@whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)
|
||||
nprocessors = 2
|
||||
@whisper.full_parallel(@params, @samples, @samples.length, nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_with_memory_view
|
||||
nprocessors = 2
|
||||
samples = JFKReader.new(AUDIO)
|
||||
@whisper.full_parallel(@params, samples, nil, Etc.nprocessors)
|
||||
@whisper.full_parallel(@params, samples, nil, nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length_and_n_processors
|
||||
@ -192,17 +200,18 @@ class TestWhisper < TestBase
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length
|
||||
@whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)
|
||||
nprocessors = 2
|
||||
@whisper.full_parallel(@params, @samples, nil, nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_n_processors
|
||||
@ -210,8 +219,8 @@ class TestWhisper < TestBase
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
535
build-xcframework.sh
Executable file
535
build-xcframework.sh
Executable file
@ -0,0 +1,535 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Options
|
||||
IOS_MIN_OS_VERSION=16.4
|
||||
MACOS_MIN_OS_VERSION=13.3
|
||||
VISIONOS_MIN_OS_VERSION=1.0
|
||||
TVOS_MIN_OS_VERSION=16.4
|
||||
|
||||
BUILD_SHARED_LIBS=OFF
|
||||
WHISPER_BUILD_EXAMPLES=OFF
|
||||
WHISPER_BUILD_TESTS=OFF
|
||||
WHISPER_BUILD_SERVER=OFF
|
||||
GGML_METAL=ON
|
||||
GGML_METAL_EMBED_LIBRARY=ON
|
||||
GGML_BLAS_DEFAULT=ON
|
||||
GGML_METAL_USE_BF16=ON
|
||||
GGML_OPENMP=OFF
|
||||
|
||||
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
|
||||
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
|
||||
|
||||
# Common options for all builds
|
||||
COMMON_CMAKE_ARGS=(
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
|
||||
-DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
|
||||
-DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
|
||||
-DWHISPER_BUILD_EXAMPLES=${WHISPER_BUILD_EXAMPLES}
|
||||
-DWHISPER_BUILD_TESTS=${WHISPER_BUILD_TESTS}
|
||||
-DWHISPER_BUILD_SERVER=${WHISPER_BUILD_SERVER}
|
||||
-DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
|
||||
-DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
|
||||
-DGGML_METAL=${GGML_METAL}
|
||||
-DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
|
||||
-DGGML_NATIVE=OFF
|
||||
-DGGML_OPENMP=${GGML_OPENMP}
|
||||
)
|
||||
|
||||
check_required_tool() {
|
||||
local tool=$1
|
||||
local install_message=$2
|
||||
|
||||
if ! command -v $tool &> /dev/null; then
|
||||
echo "Error: $tool is required but not found."
|
||||
echo "$install_message"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
echo "Checking for required tools..."
|
||||
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
|
||||
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
|
||||
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
|
||||
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
|
||||
|
||||
set -e
|
||||
|
||||
## Clean up previous builds
|
||||
rm -rf build-apple
|
||||
rm -rf build-ios-sim
|
||||
rm -rf build-ios-device
|
||||
rm -rf build-macos
|
||||
rm -rf build-visionos
|
||||
rm -rf build-visionos-sim
|
||||
rm -rf build-tvos-sim
|
||||
rm -rf build-tvos-device
|
||||
|
||||
# Setup the xcframework build directory structure
|
||||
setup_framework_structure() {
|
||||
local build_dir=$1
|
||||
local min_os_version=$2
|
||||
local platform=$3 # "ios", "macos", "visionos", or "tvos"
|
||||
local framework_name="whisper"
|
||||
|
||||
echo "Creating ${platform}-style framework structure for ${build_dir}"
|
||||
|
||||
if [[ "$platform" == "macos" ]]; then
|
||||
# macOS versioned structure uses versioned directories
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
|
||||
|
||||
# Create symbolic links
|
||||
ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
|
||||
ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
|
||||
ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
|
||||
ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
|
||||
ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
|
||||
|
||||
# Set header and module paths
|
||||
local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
|
||||
local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
|
||||
else
|
||||
# iOS/VisionOS/tvOS use a flat structure
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
|
||||
|
||||
# Remove any existing structure to ensure clean build
|
||||
rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
|
||||
|
||||
# Set header and module paths
|
||||
local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
|
||||
local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
|
||||
fi
|
||||
|
||||
# Copy all required headers (common for all platforms)
|
||||
cp include/whisper.h ${header_path}
|
||||
cp ggml/include/ggml.h ${header_path}
|
||||
cp ggml/include/ggml-alloc.h ${header_path}
|
||||
cp ggml/include/ggml-backend.h ${header_path}
|
||||
cp ggml/include/ggml-metal.h ${header_path}
|
||||
cp ggml/include/ggml-cpu.h ${header_path}
|
||||
cp ggml/include/ggml-blas.h ${header_path}
|
||||
cp ggml/include/gguf.h ${header_path}
|
||||
|
||||
# Create module map (common for all platforms)
|
||||
cat > ${module_path}module.modulemap << EOF
|
||||
framework module whisper {
|
||||
header "whisper.h"
|
||||
header "ggml.h"
|
||||
header "ggml-alloc.h"
|
||||
header "ggml-backend.h"
|
||||
header "ggml-metal.h"
|
||||
header "ggml-cpu.h"
|
||||
header "ggml-blas.h"
|
||||
header "gguf.h"
|
||||
|
||||
link "c++"
|
||||
link framework "Accelerate"
|
||||
link framework "Metal"
|
||||
link framework "Foundation"
|
||||
|
||||
export *
|
||||
}
|
||||
EOF
|
||||
|
||||
# Platform-specific settings for Info.plist
|
||||
local platform_name=""
|
||||
local sdk_name=""
|
||||
local supported_platform=""
|
||||
|
||||
case "$platform" in
|
||||
"ios")
|
||||
platform_name="iphoneos"
|
||||
sdk_name="iphoneos${min_os_version}"
|
||||
supported_platform="iPhoneOS"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=' <key>UIDeviceFamily</key>
|
||||
<array>
|
||||
<integer>1</integer>
|
||||
<integer>2</integer>
|
||||
</array>'
|
||||
;;
|
||||
"macos")
|
||||
platform_name="macosx"
|
||||
sdk_name="macosx${min_os_version}"
|
||||
supported_platform="MacOSX"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
|
||||
local device_family=""
|
||||
;;
|
||||
"visionos")
|
||||
platform_name="xros"
|
||||
sdk_name="xros${min_os_version}"
|
||||
supported_platform="XRPlatform"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=""
|
||||
;;
|
||||
"tvos")
|
||||
platform_name="appletvos"
|
||||
sdk_name="appletvos${min_os_version}"
|
||||
supported_platform="AppleTVOS"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=' <key>UIDeviceFamily</key>
|
||||
<array>
|
||||
<integer>3</integer>
|
||||
</array>'
|
||||
;;
|
||||
esac
|
||||
|
||||
# Create Info.plist
|
||||
cat > ${plist_path} << EOF
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>whisper</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>org.ggml.whisper</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>whisper</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>FMWK</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>MinimumOSVersion</key>
|
||||
<string>${min_os_version}</string>
|
||||
<key>CFBundleSupportedPlatforms</key>
|
||||
<array>
|
||||
<string>${supported_platform}</string>
|
||||
</array>${device_family}
|
||||
<key>DTPlatformName</key>
|
||||
<string>${platform_name}</string>
|
||||
<key>DTSDKName</key>
|
||||
<string>${sdk_name}</string>
|
||||
</dict>
|
||||
</plist>
|
||||
EOF
|
||||
}
|
||||
|
||||
# Create dynamic libraries from static libraries.
|
||||
combine_static_libraries() {
|
||||
local build_dir="$1"
|
||||
local release_dir="$2"
|
||||
local platform="$3" # "ios", "macos", "visionos", or "tvos"
|
||||
local is_simulator="$4"
|
||||
local base_dir="$(pwd)"
|
||||
local framework_name="whisper"
|
||||
|
||||
# Determine output path based on platform
|
||||
local output_lib=""
|
||||
if [[ "$platform" == "macos" ]]; then
|
||||
# macOS uses versioned structure
|
||||
output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
|
||||
else
|
||||
# iOS, visionOS, and tvOS use a directory flat structure
|
||||
output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
|
||||
fi
|
||||
|
||||
local libs=(
|
||||
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
|
||||
)
|
||||
if [[ "$platform" == "macos" || "$platform" == "ios" ]]; then
|
||||
echo "Adding libwhisper.coreml library to the build."
|
||||
libs+=(
|
||||
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.coreml.a"
|
||||
)
|
||||
fi
|
||||
|
||||
# Create temporary directory for processing
|
||||
local temp_dir="${base_dir}/${build_dir}/temp"
|
||||
echo "Creating temporary directory: ${temp_dir}"
|
||||
mkdir -p "${temp_dir}"
|
||||
|
||||
# Since we have multiple architectures libtool will find object files that do not
|
||||
# match the target architecture. We suppress these warnings.
|
||||
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
|
||||
|
||||
# Determine SDK, architectures, and install_name based on platform and simulator flag.
|
||||
local sdk=""
|
||||
local archs=""
|
||||
local min_version_flag=""
|
||||
local install_name=""
|
||||
local frameworks="-framework Foundation -framework Metal -framework Accelerate"
|
||||
|
||||
case "$platform" in
|
||||
"ios")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="iphonesimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
|
||||
else
|
||||
sdk="iphoneos"
|
||||
archs="arm64"
|
||||
min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
frameworks+=" -framework CoreML"
|
||||
;;
|
||||
"macos")
|
||||
sdk="macosx"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
|
||||
install_name="@rpath/whisper.framework/Versions/Current/whisper"
|
||||
frameworks+=" -framework CoreML"
|
||||
;;
|
||||
"visionos")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="xrsimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
|
||||
else
|
||||
sdk="xros"
|
||||
archs="arm64"
|
||||
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
# Use flat structure for visionOS, same as iOS
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
;;
|
||||
"tvos")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="appletvsimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
|
||||
else
|
||||
sdk="appletvos"
|
||||
archs="arm64"
|
||||
min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Build architecture flags
|
||||
local arch_flags=""
|
||||
for arch in $archs; do
|
||||
arch_flags+=" -arch $arch"
|
||||
done
|
||||
|
||||
# Create dynamic library
|
||||
echo "Creating dynamic library for ${platform}."
|
||||
xcrun -sdk $sdk clang++ -dynamiclib \
|
||||
-isysroot $(xcrun --sdk $sdk --show-sdk-path) \
|
||||
$arch_flags \
|
||||
$min_version_flag \
|
||||
-Wl,-force_load,"${temp_dir}/combined.a" \
|
||||
$frameworks \
|
||||
-install_name "$install_name" \
|
||||
-o "${base_dir}/${output_lib}"
|
||||
|
||||
# Platform-specific post-processing for device builds
|
||||
if [[ "$is_simulator" == "false" ]]; then
|
||||
if command -v vtool &>/dev/null; then
|
||||
case "$platform" in
|
||||
"ios")
|
||||
echo "Marking binary as a framework binary for iOS..."
|
||||
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
"visionos")
|
||||
echo "Marking binary as a framework binary for visionOS..."
|
||||
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
"tvos")
|
||||
echo "Marking binary as a framework binary for tvOS..."
|
||||
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo "Warning: vtool not found. Binary may not pass App Store validation."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Creating properly formatted dSYM..."
|
||||
# Create a separate directory for dSYMs for all platforms
|
||||
mkdir -p "${base_dir}/${build_dir}/dSYMs"
|
||||
|
||||
# iOS and visionOS style dSYM (flat structure)
|
||||
if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
|
||||
# Generate dSYM in the dSYMs directory
|
||||
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
|
||||
|
||||
# Create a copy of the binary that will be stripped
|
||||
cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
|
||||
|
||||
# Strip debug symbols from the copy
|
||||
xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
|
||||
|
||||
# Replace the original with the stripped version
|
||||
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
|
||||
else
|
||||
# macOS style dSYM
|
||||
# First strip debug info to a separate file
|
||||
xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
|
||||
|
||||
# Generate dSYM in the dSYMs directory
|
||||
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
|
||||
|
||||
# Replace original binary with stripped version
|
||||
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
|
||||
fi
|
||||
|
||||
# Remove any automatically generated dSYM files in the framework structure as they will
|
||||
# otherwise case Invalid Bundle Structure validation errors.
|
||||
if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
|
||||
echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
|
||||
rm -rf "${base_dir}/${output_lib}.dSYM"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
echo "Building for iOS simulator..."
|
||||
cmake -B build-ios-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
|
||||
-DIOS=ON \
|
||||
-DCMAKE_SYSTEM_NAME=iOS \
|
||||
-DCMAKE_OSX_SYSROOT=iphonesimulator \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-ios-sim --config Release -- -quiet
|
||||
|
||||
echo "Building for iOS devices..."
|
||||
cmake -B build-ios-device -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_SYSROOT=iphoneos \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-ios-device --config Release -- -quiet
|
||||
|
||||
echo "Building for macOS..."
|
||||
cmake -B build-macos -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-macos --config Release -- -quiet
|
||||
|
||||
echo "Building for visionOS..."
|
||||
cmake -B build-visionos -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_SYSTEM_NAME=visionOS \
|
||||
-DCMAKE_OSX_SYSROOT=xros \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
|
||||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-visionos --config Release -- -quiet
|
||||
|
||||
echo "Building for visionOS simulator..."
|
||||
cmake -B build-visionos-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_SYSTEM_NAME=visionOS \
|
||||
-DCMAKE_OSX_SYSROOT=xrsimulator \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
|
||||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-visionos-sim --config Release -- -quiet
|
||||
|
||||
# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
|
||||
echo "Building for tvOS simulator..."
|
||||
cmake -B build-tvos-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||
-DCMAKE_OSX_SYSROOT=appletvsimulator \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DGGML_METAL=ON \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-tvos-sim --config Release -- -quiet
|
||||
|
||||
echo "Building for tvOS devices..."
|
||||
cmake -B build-tvos-device -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||
-DCMAKE_OSX_SYSROOT=appletvos \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DGGML_METAL=ON \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-tvos-device --config Release -- -quiet
|
||||
|
||||
# Setup frameworks and copy binaries and headers
|
||||
echo "Setting up framework structures..."
|
||||
setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
|
||||
setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
|
||||
setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
|
||||
setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
|
||||
setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
|
||||
setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
|
||||
setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
|
||||
|
||||
# Create dynamic libraries from static libraries
|
||||
echo "Creating dynamic libraries from static libraries..."
|
||||
combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
|
||||
combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
|
||||
combine_static_libraries "build-macos" "Release" "macos" "false"
|
||||
combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
|
||||
combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
|
||||
combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
|
||||
combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
|
||||
|
||||
# Create XCFramework with correct debug symbols paths
|
||||
echo "Creating XCFramework..."
|
||||
xcodebuild -create-xcframework \
|
||||
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-ios-device/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-ios-device/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-macos/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-macos/dSYMS/whisper.dSYM \
|
||||
-framework $(pwd)/build-visionos/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-visionos/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-visionos-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-visionos-sim/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-tvos-device/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-tvos-device/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-tvos-sim/dSYMs/whisper.dSYM \
|
||||
-output $(pwd)/build-apple/whisper.xcframework
|
41
ci/README.md
Normal file
41
ci/README.md
Normal file
@ -0,0 +1,41 @@
|
||||
# CI
|
||||
|
||||
In addition to [Github Actions](https://github.com/ggerganov/whisper.cpp/actions) `whisper.cpp` uses a custom CI framework:
|
||||
|
||||
https://github.com/ggml-org/ci
|
||||
|
||||
It monitors the `master` branch for new commits and runs the
|
||||
[ci/run.sh](https://github.com/ggerganov/whisper.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
|
||||
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
|
||||
to cover various hardware architectures, including GPU and Apple Silicon instances.
|
||||
|
||||
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
|
||||
Only the branches of this repo are monitored for this keyword.
|
||||
|
||||
It is a good practice, before publishing changes to execute the full CI locally on your machine:
|
||||
|
||||
```bash
|
||||
mkdir tmp
|
||||
|
||||
# CPU-only build
|
||||
bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
|
||||
# with CUDA support
|
||||
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
The CI script supports several environment variables to control the build:
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `GG_BUILD_CUDA` | Enable NVIDIA CUDA GPU acceleration |
|
||||
| `GG_BUILD_SYCL` | Enable Intel SYCL acceleration |
|
||||
| `GG_BUILD_VULKAN` | Enable Vulkan GPU acceleration |
|
||||
| `GG_BUILD_METAL` | Enable Metal acceleration on Apple Silicon |
|
||||
| `GG_BUILD_BLAS` | Enable BLAS CPU acceleration |
|
||||
| `GG_BUILD_OPENVINO` | Enable OpenVINO support |
|
||||
| `GG_BUILD_COREML` | Enable Core ML support for Apple Neural Engine |
|
||||
| `GG_BUILD_LOW_PERF` | Limit tests for low-performance hardware |
|
||||
| `GG_BUILD_TEST_MODELS` | Comma-separated list of models to test (e.g. "tiny.en,tiny,base,medium", defaults to all models unless `GG_BUILD_LOW_PERF` is set) |
|
336
ci/run.sh
Normal file
336
ci/run.sh
Normal file
@ -0,0 +1,336 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# sample usage:
|
||||
#
|
||||
# mkdir tmp
|
||||
#
|
||||
# # CPU-only build
|
||||
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
# # with CUDA support
|
||||
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
# # with SYCL support
|
||||
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
|
||||
if [ -z "$2" ]; then
|
||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$1"
|
||||
mkdir -p "$2"
|
||||
|
||||
OUT=$(realpath "$1")
|
||||
MNT=$(realpath "$2")
|
||||
|
||||
rm -f "$OUT/*.log"
|
||||
rm -f "$OUT/*.exit"
|
||||
rm -f "$OUT/*.md"
|
||||
|
||||
sd=`dirname $0`
|
||||
cd $sd/../
|
||||
SRC=`pwd`
|
||||
|
||||
ALL_MODELS=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
|
||||
BENCH_N_THREADS=4
|
||||
BENCH_ENCODER_ONLY=0
|
||||
BENCH_FLASH_ATTN=0
|
||||
|
||||
# check for user-specified models first. if not specified, use fast models
|
||||
if [ ! -z ${GG_BUILD_TEST_MODELS} ]; then
|
||||
IFS=',' read -r -a MODELS <<< "${GG_BUILD_TEST_MODELS}"
|
||||
else
|
||||
if [ ! -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
MODELS=( "tiny" "base" "small" )
|
||||
else
|
||||
MODELS=("${ALL_MODELS[@]}")
|
||||
fi
|
||||
fi
|
||||
|
||||
CMAKE_EXTRA="-DWHISPER_FATAL_WARNINGS=ON"
|
||||
|
||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||
if [ -z ${ONEAPI_ROOT} ]; then
|
||||
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
|
||||
echo "source /opt/intel/oneapi/setvars.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_BLAS} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_BLAS=ON"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_COREML} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_COREML=ON"
|
||||
fi
|
||||
|
||||
## helpers
|
||||
|
||||
# download a file if it does not exist or if it is outdated
|
||||
function gg_wget {
|
||||
local out=$1
|
||||
local url=$2
|
||||
|
||||
local cwd=`pwd`
|
||||
|
||||
mkdir -p $out
|
||||
cd $out
|
||||
|
||||
# should not re-download if file is the same
|
||||
wget -nv -N $url
|
||||
|
||||
cd $cwd
|
||||
}
|
||||
|
||||
function gg_download_model {
|
||||
local model_name=$1
|
||||
local model_file="$MNT/models/ggml-${model_name}.bin"
|
||||
|
||||
if [ ! -f ${model_file} ]; then
|
||||
local cwd=`pwd`
|
||||
mkdir -p "$MNT/models"
|
||||
cd "$MNT/models"
|
||||
bash "$cwd/models/download-ggml-model.sh" ${model_name} .
|
||||
cd "$cwd"
|
||||
fi
|
||||
}
|
||||
|
||||
function gg_printf {
|
||||
printf -- "$@" >> $OUT/README.md
|
||||
}
|
||||
|
||||
# Helper function to check command exit status
|
||||
function gg_check_last_command_status {
|
||||
local exit_file=$1
|
||||
local command_name=$2
|
||||
|
||||
local exit_status=$?
|
||||
echo "$exit_status" > "$exit_file"
|
||||
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
echo "Error: Command $command_name failed with exit status $exit_status"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Usage: gg_run <test_name> [additional_args...]
|
||||
#
|
||||
# Parameters:
|
||||
# test_name - Name of the test to run (calls gg_run_<test_name>)
|
||||
# additional_args - Any additional arguments to pass to the test function (first argument is appended to the log filename)
|
||||
function gg_run {
|
||||
ci=$1
|
||||
|
||||
if [ $# -gt 1 ]; then
|
||||
ci="${ci}_${2}"
|
||||
fi
|
||||
|
||||
set -o pipefail
|
||||
set -x
|
||||
|
||||
gg_run_$1 "$@" | tee $OUT/$ci.log
|
||||
cur=$?
|
||||
echo "$cur" > $OUT/$ci.exit
|
||||
|
||||
set +x
|
||||
set +o pipefail
|
||||
|
||||
gg_sum_$1 "$@"
|
||||
|
||||
ret=$((ret | cur))
|
||||
}
|
||||
|
||||
function gg_check_build_requirements {
|
||||
if ! command -v cmake &> /dev/null; then
|
||||
gg_printf 'cmake not found, please install'
|
||||
fi
|
||||
|
||||
if ! command -v make &> /dev/null; then
|
||||
gg_printf 'make not found, please install'
|
||||
fi
|
||||
}
|
||||
|
||||
## ci
|
||||
|
||||
function gg_run_ctest {
|
||||
mode=$2
|
||||
|
||||
cd ${SRC}
|
||||
|
||||
rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode}
|
||||
|
||||
set -e
|
||||
|
||||
gg_check_build_requirements
|
||||
|
||||
(time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||
|
||||
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||
|
||||
set +e
|
||||
}
|
||||
|
||||
function gg_sum_ctest {
|
||||
mode=$2
|
||||
|
||||
gg_printf '### %s\n\n' "${ci}"
|
||||
|
||||
gg_printf 'Runs ctest in '${mode}' mode\n'
|
||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||
gg_printf '```\n'
|
||||
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
|
||||
gg_printf '```\n'
|
||||
}
|
||||
|
||||
function gg_run_bench {
|
||||
cd ${SRC}
|
||||
|
||||
# set flash attention flag if enabled
|
||||
fattn=""
|
||||
if [ "$BENCH_FLASH_ATTN" -eq 1 ]; then
|
||||
fattn="-fa"
|
||||
fi
|
||||
|
||||
# run memcpy benchmark if not encoder-only mode
|
||||
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
|
||||
echo "Running memcpy benchmark"
|
||||
(time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-memcpy.log
|
||||
gg_check_last_command_status "$OUT/${ci}-memcpy.exit" "memcpy benchmark"
|
||||
|
||||
echo "Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads"
|
||||
(time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-mul_mat.log
|
||||
gg_check_last_command_status "$OUT/${ci}-mul_mat.exit" "ggml_mul_mat benchmark"
|
||||
fi
|
||||
|
||||
echo "Running benchmark for all models"
|
||||
|
||||
# generate header for the benchmark table
|
||||
{
|
||||
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "Config" "Model" "Th" "FA" "Enc." "Dec." "Bch5" "PP" "Commit"
|
||||
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---"
|
||||
} | tee -a $OUT/${ci}-models-table.log
|
||||
|
||||
# run benchmark for each model
|
||||
for model in "${MODELS[@]}"; do
|
||||
echo "Benchmarking model: $model"
|
||||
|
||||
# run the benchmark and capture output
|
||||
output=$(./build-ci-release/bin/whisper-bench -m $MNT/models/ggml-$model.bin -t $BENCH_N_THREADS $fattn 2>&1)
|
||||
ret=$?
|
||||
|
||||
# save the raw output
|
||||
echo "$output" > $OUT/${ci}-bench-$model.log
|
||||
|
||||
if [ $ret -eq 0 ]; then
|
||||
# parse the benchmark results
|
||||
encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
|
||||
decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
|
||||
batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
|
||||
prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
|
||||
system_info=$(echo "$output" | grep "system_info")
|
||||
actual_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
|
||||
|
||||
# determine configuration
|
||||
config=""
|
||||
if [[ $system_info == *"AVX2 = 1"* ]]; then
|
||||
config="$config AVX2"
|
||||
fi
|
||||
if [[ $system_info == *"NEON = 1"* ]]; then
|
||||
config="$config NEON"
|
||||
fi
|
||||
if [[ $system_info == *"BLAS = 1"* ]]; then
|
||||
config="$config BLAS"
|
||||
fi
|
||||
if [[ $system_info == *"COREML = 1"* ]]; then
|
||||
config="$config COREML"
|
||||
fi
|
||||
if [[ $system_info == *"CUDA = 1"* ]]; then
|
||||
config="$config CUDA"
|
||||
fi
|
||||
if [[ $system_info == *"METAL = 1"* ]]; then
|
||||
config="$config METAL"
|
||||
fi
|
||||
|
||||
# get commit hash
|
||||
commit=$(git rev-parse --short HEAD)
|
||||
|
||||
# add row to benchmark table
|
||||
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" \
|
||||
"$config" "$model" "$actual_threads" "$BENCH_FLASH_ATTN" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit" \
|
||||
| tee -a $OUT/${ci}-models-table.log
|
||||
else
|
||||
echo "Benchmark failed for model: $model" | tee -a $OUT/${ci}-bench-errors.log
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function gg_sum_bench {
|
||||
gg_printf '### %s\n\n' "${ci}"
|
||||
|
||||
gg_printf 'Whisper Benchmark Results\n'
|
||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||
|
||||
# show memcpy and ggml_mul_mat benchmark results if available
|
||||
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
|
||||
if [ -f "$OUT/${ci}-memcpy.log" ]; then
|
||||
gg_printf '#### memcpy Benchmark\n\n'
|
||||
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-memcpy.log)"
|
||||
fi
|
||||
|
||||
if [ -f "$OUT/${ci}-mul_mat.log" ]; then
|
||||
gg_printf '#### ggml_mul_mat Benchmark\n\n'
|
||||
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-mul_mat.log)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# show model benchmark results
|
||||
gg_printf '#### Model Benchmarks\n\n'
|
||||
if [ -f "$OUT/${ci}-models-table.log" ]; then
|
||||
gg_printf '%s\n\n' "$(cat $OUT/${ci}-models-table.log)"
|
||||
else
|
||||
gg_printf 'No model benchmark results available.\n\n'
|
||||
fi
|
||||
|
||||
# show any errors that occurred
|
||||
if [ -f "$OUT/${ci}-bench-errors.log" ]; then
|
||||
gg_printf '#### Benchmark Errors\n\n'
|
||||
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-bench-errors.log)"
|
||||
fi
|
||||
}
|
||||
|
||||
ret=0
|
||||
|
||||
for model in "${MODELS[@]}"; do
|
||||
test $ret -eq 0 && gg_download_model ${model}
|
||||
done
|
||||
if [ -z ${GG_BUILD_SYCL}]; then
|
||||
test $ret -eq 0 && gg_run ctest debug
|
||||
fi
|
||||
test $ret -eq 0 && gg_run ctest release
|
||||
|
||||
test $ret -eq 0 && gg_run bench
|
||||
|
||||
exit $ret
|
28
close-issue.yml
Normal file
28
close-issue.yml
Normal file
@ -0,0 +1,28 @@
|
||||
name: Close inactive issues
|
||||
on:
|
||||
schedule:
|
||||
- cron: "42 0 * * *"
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 14
|
||||
stale-issue-label: "stale"
|
||||
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
operations-per-run: 10000
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
@ -42,6 +42,8 @@ endif()
|
||||
if(MSVC)
|
||||
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
|
||||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
||||
else()
|
||||
execute_process(
|
||||
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||
|
@ -14,10 +14,6 @@ if (WHISPER_SDL2)
|
||||
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
|
||||
endif()
|
||||
|
||||
if (WHISPER_CLBLAST)
|
||||
find_package(CLBlast REQUIRED)
|
||||
endif()
|
||||
|
||||
# common
|
||||
|
||||
set(TARGET common)
|
||||
@ -56,6 +52,8 @@ add_library(${TARGET} STATIC
|
||||
common.cpp
|
||||
common-ggml.h
|
||||
common-ggml.cpp
|
||||
common-whisper.h
|
||||
common-whisper.cpp
|
||||
grammar-parser.h
|
||||
grammar-parser.cpp
|
||||
${COMMON_SOURCES_FFMPEG}
|
||||
@ -63,7 +61,7 @@ add_library(${TARGET} STATIC
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS})
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
|
||||
|
@ -18,6 +18,7 @@ const whisperParamsMock = {
|
||||
translate: true,
|
||||
no_timestamps: false,
|
||||
audio_ctx: 0,
|
||||
max_len: 0,
|
||||
};
|
||||
|
||||
describe("Run whisper.node", () => {
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "napi.h"
|
||||
#include "common.h"
|
||||
#include "common-whisper.h"
|
||||
|
||||
#include "whisper.h"
|
||||
|
||||
@ -127,192 +128,227 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
|
||||
|
||||
void cb_log_disable(enum ggml_log_level, const char *, void *) {}
|
||||
|
||||
int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
||||
if (params.no_prints) {
|
||||
whisper_log_set(cb_log_disable, NULL);
|
||||
}
|
||||
|
||||
if (params.fname_inp.empty() && params.pcmf32.empty()) {
|
||||
fprintf(stderr, "error: no input files or audio buffer specified\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
|
||||
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams = whisper_context_default_params();
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
cparams.flash_attn = params.flash_attn;
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
// if params.pcmf32 is provided, set params.fname_inp to "buffer"
|
||||
// this is simpler than further modifications in the code
|
||||
if (!params.pcmf32.empty()) {
|
||||
fprintf(stderr, "info: using audio buffer as input\n");
|
||||
params.fname_inp.clear();
|
||||
params.fname_inp.emplace_back("buffer");
|
||||
}
|
||||
|
||||
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
||||
const auto fname_inp = params.fname_inp[f];
|
||||
const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
|
||||
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
// read the input audio file if params.pcmf32 is not provided
|
||||
if (params.pcmf32.empty()) {
|
||||
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
pcmf32 = params.pcmf32;
|
||||
}
|
||||
|
||||
// print system information
|
||||
if (!params.no_prints) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
|
||||
}
|
||||
|
||||
// print some info about the processing
|
||||
if (!params.no_prints) {
|
||||
fprintf(stderr, "\n");
|
||||
if (!whisper_is_multilingual(ctx)) {
|
||||
if (params.language != "en" || params.translate) {
|
||||
params.language = "en";
|
||||
params.translate = false;
|
||||
fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
|
||||
__func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
||||
params.n_threads, params.n_processors,
|
||||
params.language.c_str(),
|
||||
params.translate ? "translate" : "transcribe",
|
||||
params.no_timestamps ? 0 : 1,
|
||||
params.audio_ctx);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
// run the inference
|
||||
{
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
|
||||
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = params.print_progress;
|
||||
wparams.print_timestamps = !params.no_timestamps;
|
||||
wparams.print_special = params.print_special;
|
||||
wparams.translate = params.translate;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.n_threads = params.n_threads;
|
||||
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
|
||||
wparams.offset_ms = params.offset_t_ms;
|
||||
wparams.duration_ms = params.duration_ms;
|
||||
|
||||
wparams.token_timestamps = params.output_wts || params.max_len > 0;
|
||||
wparams.thold_pt = params.word_thold;
|
||||
wparams.entropy_thold = params.entropy_thold;
|
||||
wparams.logprob_thold = params.logprob_thold;
|
||||
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
|
||||
wparams.greedy.best_of = params.best_of;
|
||||
wparams.beam_search.beam_size = params.beam_size;
|
||||
|
||||
wparams.initial_prompt = params.prompt.c_str();
|
||||
|
||||
wparams.no_timestamps = params.no_timestamps;
|
||||
|
||||
whisper_print_user_data user_data = { ¶ms, &pcmf32s };
|
||||
|
||||
// this callback is called on each new segment
|
||||
if (!wparams.print_realtime) {
|
||||
wparams.new_segment_callback = whisper_print_segment_callback;
|
||||
wparams.new_segment_callback_user_data = &user_data;
|
||||
}
|
||||
|
||||
// example for abort mechanism
|
||||
// in this example, we do not abort the processing, but we could if the flag is set to true
|
||||
// the callback is called before every encoder run - if it returns false, the processing is aborted
|
||||
{
|
||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return !is_aborted;
|
||||
};
|
||||
wparams.encoder_begin_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
|
||||
fprintf(stderr, "failed to process audio\n");
|
||||
return 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const int n_segments = whisper_full_n_segments(ctx);
|
||||
result.resize(n_segments);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(ctx, i);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
||||
|
||||
result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
|
||||
result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
|
||||
result[i].emplace_back(text);
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
class Worker : public Napi::AsyncWorker {
|
||||
class ProgressWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
Worker(Napi::Function& callback, whisper_params params)
|
||||
: Napi::AsyncWorker(callback), params(params) {}
|
||||
|
||||
void Execute() override {
|
||||
run(params, result);
|
||||
}
|
||||
|
||||
void OnOK() override {
|
||||
Napi::HandleScope scope(Env());
|
||||
Napi::Object res = Napi::Array::New(Env(), result.size());
|
||||
for (uint64_t i = 0; i < result.size(); ++i) {
|
||||
Napi::Object tmp = Napi::Array::New(Env(), 3);
|
||||
for (uint64_t j = 0; j < 3; ++j) {
|
||||
tmp[j] = Napi::String::New(Env(), result[i][j]);
|
||||
}
|
||||
res[i] = tmp;
|
||||
ProgressWorker(Napi::Function& callback, whisper_params params, Napi::Function progress_callback, Napi::Env env)
|
||||
: Napi::AsyncWorker(callback), params(params), env(env) {
|
||||
// Create thread-safe function
|
||||
if (!progress_callback.IsEmpty()) {
|
||||
tsfn = Napi::ThreadSafeFunction::New(
|
||||
env,
|
||||
progress_callback,
|
||||
"Progress Callback",
|
||||
0,
|
||||
1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
~ProgressWorker() {
|
||||
if (tsfn) {
|
||||
// Make sure to release the thread-safe function on destruction
|
||||
tsfn.Release();
|
||||
}
|
||||
}
|
||||
|
||||
void Execute() override {
|
||||
// Use custom run function with progress callback support
|
||||
run_with_progress(params, result);
|
||||
}
|
||||
|
||||
void OnOK() override {
|
||||
Napi::HandleScope scope(Env());
|
||||
Napi::Object res = Napi::Array::New(Env(), result.size());
|
||||
for (uint64_t i = 0; i < result.size(); ++i) {
|
||||
Napi::Object tmp = Napi::Array::New(Env(), 3);
|
||||
for (uint64_t j = 0; j < 3; ++j) {
|
||||
tmp[j] = Napi::String::New(Env(), result[i][j]);
|
||||
}
|
||||
res[i] = tmp;
|
||||
}
|
||||
Callback().Call({Env().Null(), res});
|
||||
}
|
||||
|
||||
// Progress callback function - using thread-safe function
|
||||
void OnProgress(int progress) {
|
||||
if (tsfn) {
|
||||
// Use thread-safe function to call JavaScript callback
|
||||
auto callback = [progress](Napi::Env env, Napi::Function jsCallback) {
|
||||
jsCallback.Call({Napi::Number::New(env, progress)});
|
||||
};
|
||||
|
||||
tsfn.BlockingCall(callback);
|
||||
}
|
||||
}
|
||||
Callback().Call({Env().Null(), res});
|
||||
}
|
||||
|
||||
private:
|
||||
whisper_params params;
|
||||
std::vector<std::vector<std::string>> result;
|
||||
whisper_params params;
|
||||
std::vector<std::vector<std::string>> result;
|
||||
Napi::Env env;
|
||||
Napi::ThreadSafeFunction tsfn;
|
||||
|
||||
// Custom run function with progress callback support
|
||||
int run_with_progress(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
||||
if (params.no_prints) {
|
||||
whisper_log_set(cb_log_disable, NULL);
|
||||
}
|
||||
|
||||
if (params.fname_inp.empty() && params.pcmf32.empty()) {
|
||||
fprintf(stderr, "error: no input files or audio buffer specified\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
|
||||
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// whisper init
|
||||
struct whisper_context_params cparams = whisper_context_default_params();
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
cparams.flash_attn = params.flash_attn;
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
// If params.pcmf32 provides, set params.fname_inp as "buffer"
|
||||
if (!params.pcmf32.empty()) {
|
||||
fprintf(stderr, "info: using audio buffer as input\n");
|
||||
params.fname_inp.clear();
|
||||
params.fname_inp.emplace_back("buffer");
|
||||
}
|
||||
|
||||
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
||||
const auto fname_inp = params.fname_inp[f];
|
||||
const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
|
||||
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
// If params.pcmf32 is empty, read input audio file
|
||||
if (params.pcmf32.empty()) {
|
||||
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
pcmf32 = params.pcmf32;
|
||||
}
|
||||
|
||||
// Print system info
|
||||
if (!params.no_prints) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
|
||||
}
|
||||
|
||||
// Print processing info
|
||||
if (!params.no_prints) {
|
||||
fprintf(stderr, "\n");
|
||||
if (!whisper_is_multilingual(ctx)) {
|
||||
if (params.language != "en" || params.translate) {
|
||||
params.language = "en";
|
||||
params.translate = false;
|
||||
fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d, audio_ctx = %d ...\n",
|
||||
__func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
||||
params.n_threads, params.n_processors,
|
||||
params.language.c_str(),
|
||||
params.translate ? "translate" : "transcribe",
|
||||
params.no_timestamps ? 0 : 1,
|
||||
params.audio_ctx);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
// Run inference
|
||||
{
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
|
||||
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = params.print_progress;
|
||||
wparams.print_timestamps = !params.no_timestamps;
|
||||
wparams.print_special = params.print_special;
|
||||
wparams.translate = params.translate;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.n_threads = params.n_threads;
|
||||
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
|
||||
wparams.offset_ms = params.offset_t_ms;
|
||||
wparams.duration_ms = params.duration_ms;
|
||||
|
||||
wparams.token_timestamps = params.output_wts || params.max_len > 0;
|
||||
wparams.thold_pt = params.word_thold;
|
||||
wparams.entropy_thold = params.entropy_thold;
|
||||
wparams.logprob_thold = params.logprob_thold;
|
||||
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
|
||||
wparams.greedy.best_of = params.best_of;
|
||||
wparams.beam_search.beam_size = params.beam_size;
|
||||
|
||||
wparams.initial_prompt = params.prompt.c_str();
|
||||
|
||||
wparams.no_timestamps = params.no_timestamps;
|
||||
|
||||
whisper_print_user_data user_data = { ¶ms, &pcmf32s };
|
||||
|
||||
// This callback is called for each new segment
|
||||
if (!wparams.print_realtime) {
|
||||
wparams.new_segment_callback = whisper_print_segment_callback;
|
||||
wparams.new_segment_callback_user_data = &user_data;
|
||||
}
|
||||
|
||||
// Set progress callback
|
||||
wparams.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
||||
ProgressWorker* worker = static_cast<ProgressWorker*>(user_data);
|
||||
worker->OnProgress(progress);
|
||||
};
|
||||
wparams.progress_callback_user_data = this;
|
||||
|
||||
// Abort mechanism example
|
||||
{
|
||||
static bool is_aborted = false; // Note: this should be atomic to avoid data races
|
||||
|
||||
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return !is_aborted;
|
||||
};
|
||||
wparams.encoder_begin_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
|
||||
fprintf(stderr, "failed to process audio\n");
|
||||
return 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const int n_segments = whisper_full_n_segments(ctx);
|
||||
result.resize(n_segments);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(ctx, i);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
||||
|
||||
result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
|
||||
result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
|
||||
result[i].emplace_back(text);
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
Napi::Value whisper(const Napi::CallbackInfo& info) {
|
||||
Napi::Env env = info.Env();
|
||||
if (info.Length() <= 0 || !info[0].IsObject()) {
|
||||
@ -330,6 +366,24 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
||||
bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>();
|
||||
int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
|
||||
bool comma_in_time = whisper_params.Get("comma_in_time").As<Napi::Boolean>();
|
||||
int32_t max_len = whisper_params.Get("max_len").As<Napi::Number>();
|
||||
|
||||
// support prompt
|
||||
std::string prompt = "";
|
||||
if (whisper_params.Has("prompt") && whisper_params.Get("prompt").IsString()) {
|
||||
prompt = whisper_params.Get("prompt").As<Napi::String>();
|
||||
}
|
||||
|
||||
// Add support for print_progress
|
||||
bool print_progress = false;
|
||||
if (whisper_params.Has("print_progress")) {
|
||||
print_progress = whisper_params.Get("print_progress").As<Napi::Boolean>();
|
||||
}
|
||||
// Add support for progress_callback
|
||||
Napi::Function progress_callback;
|
||||
if (whisper_params.Has("progress_callback") && whisper_params.Get("progress_callback").IsFunction()) {
|
||||
progress_callback = whisper_params.Get("progress_callback").As<Napi::Function>();
|
||||
}
|
||||
|
||||
Napi::Value pcmf32Value = whisper_params.Get("pcmf32");
|
||||
std::vector<float> pcmf32_vec;
|
||||
@ -352,9 +406,13 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
||||
params.audio_ctx = audio_ctx;
|
||||
params.pcmf32 = pcmf32_vec;
|
||||
params.comma_in_time = comma_in_time;
|
||||
params.max_len = max_len;
|
||||
params.print_progress = print_progress;
|
||||
params.prompt = prompt;
|
||||
|
||||
Napi::Function callback = info[1].As<Napi::Function>();
|
||||
Worker* worker = new Worker(callback, params);
|
||||
// Create a new Worker class with progress callback support
|
||||
ProgressWorker* worker = new ProgressWorker(callback, params, progress_callback, env);
|
||||
worker->Queue();
|
||||
return env.Undefined();
|
||||
}
|
||||
|
@ -18,6 +18,10 @@ const whisperParams = {
|
||||
translate: true,
|
||||
no_timestamps: false,
|
||||
audio_ctx: 0,
|
||||
max_len: 0,
|
||||
progress_callback: (progress) => {
|
||||
console.log(`progress: ${progress}%`);
|
||||
}
|
||||
};
|
||||
|
||||
const arguments = process.argv.slice(2);
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Benchmark the performance of whisper.cpp in the browser using WebAssembly
|
||||
|
||||
Link: https://whisper.ggerganov.com/bench/
|
||||
Link: https://ggerganov.github.io/whisper.cpp/bench.wasm
|
||||
|
||||
Terminal version: [examples/bench](/examples/bench)
|
||||
|
||||
@ -15,7 +15,17 @@ cd whisper.cpp
|
||||
mkdir build-em && cd build-em
|
||||
emcmake cmake ..
|
||||
make -j
|
||||
```
|
||||
The example can then be started by running a local HTTP server:
|
||||
```console
|
||||
python3 examples/server.py
|
||||
```
|
||||
And then opening a browser to the following URL:
|
||||
http://localhost:8000/bench.wasm
|
||||
|
||||
To run the example in a different server, you need to copy the following files
|
||||
to the server's HTTP path:
|
||||
```
|
||||
# copy the produced page to your HTTP path
|
||||
cp bin/bench.wasm/* /path/to/html/
|
||||
cp bin/libbench.worker.js /path/to/html/
|
||||
|
@ -24,6 +24,8 @@
|
||||
overflow-x: scroll;
|
||||
}
|
||||
</style>
|
||||
<script src="../coi-serviceworker.js"></script>
|
||||
<link rel="icon" href="data:,">
|
||||
</head>
|
||||
<body>
|
||||
<div id="main-container">
|
||||
@ -36,11 +38,10 @@
|
||||
<br><br>
|
||||
|
||||
<b>More examples:</b>
|
||||
<a href="https://whisper.ggerganov.com/">main</a> |
|
||||
<a href="https://whisper.ggerganov.com/bench">bench</a> |
|
||||
<a href="https://whisper.ggerganov.com/stream">stream</a> |
|
||||
<a href="https://whisper.ggerganov.com/command">command</a> |
|
||||
<a href="https://whisper.ggerganov.com/talk">talk</a> |
|
||||
<a href="../">main</a> |
|
||||
<a href="../bench.wasm/">bench</a> |
|
||||
<a href="../stream.wasm">stream</a> |
|
||||
<a href="../command.wasm/">command</a> |
|
||||
|
||||
<br><br>
|
||||
|
||||
|
@ -50,11 +50,11 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
|
||||
fprintf(stderr, " %-7s 0 - whisper\n", "");
|
||||
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
||||
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "common.h"
|
||||
#include "common-whisper.h"
|
||||
|
||||
#include "whisper.h"
|
||||
#include "grammar-parser.h"
|
||||
@ -6,12 +7,18 @@
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
@ -194,7 +201,8 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
|
||||
|
||||
static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
|
||||
fprintf(stderr, "usage: %s [options] file0 file1 ...\n", argv[0]);
|
||||
fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
@ -239,7 +247,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
|
||||
fprintf(stderr, " -dl, --detect-language [%-7s] exit after automatically detecting language\n", params.detect_language ? "true" : "false");
|
||||
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt (max n_text_ctx/2 tokens)\n", params.prompt.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
|
||||
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
||||
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
|
||||
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
|
||||
@ -916,6 +924,13 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
|
||||
static void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
#if defined(_WIN32)
|
||||
// Set the console output code page to UTF-8, while command line arguments
|
||||
// are still encoded in the system's code page. In this way, we can print
|
||||
// non-ASCII characters to the console, and access files with non-ASCII paths.
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
#endif
|
||||
|
||||
whisper_params params;
|
||||
|
||||
// If the only argument starts with "@", read arguments line-by-line
|
||||
@ -1057,8 +1072,8 @@ int main(int argc, char ** argv) {
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
|
||||
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
146
examples/coi-serviceworker.js
Normal file
146
examples/coi-serviceworker.js
Normal file
@ -0,0 +1,146 @@
|
||||
/*! coi-serviceworker v0.1.7 - Guido Zuidhof and contributors, licensed under MIT */
|
||||
let coepCredentialless = false;
|
||||
if (typeof window === 'undefined') {
|
||||
self.addEventListener("install", () => self.skipWaiting());
|
||||
self.addEventListener("activate", (event) => event.waitUntil(self.clients.claim()));
|
||||
|
||||
self.addEventListener("message", (ev) => {
|
||||
if (!ev.data) {
|
||||
return;
|
||||
} else if (ev.data.type === "deregister") {
|
||||
self.registration
|
||||
.unregister()
|
||||
.then(() => {
|
||||
return self.clients.matchAll();
|
||||
})
|
||||
.then(clients => {
|
||||
clients.forEach((client) => client.navigate(client.url));
|
||||
});
|
||||
} else if (ev.data.type === "coepCredentialless") {
|
||||
coepCredentialless = ev.data.value;
|
||||
}
|
||||
});
|
||||
|
||||
self.addEventListener("fetch", function (event) {
|
||||
const r = event.request;
|
||||
if (r.cache === "only-if-cached" && r.mode !== "same-origin") {
|
||||
return;
|
||||
}
|
||||
|
||||
const request = (coepCredentialless && r.mode === "no-cors")
|
||||
? new Request(r, {
|
||||
credentials: "omit",
|
||||
})
|
||||
: r;
|
||||
event.respondWith(
|
||||
fetch(request)
|
||||
.then((response) => {
|
||||
if (response.status === 0) {
|
||||
return response;
|
||||
}
|
||||
|
||||
const newHeaders = new Headers(response.headers);
|
||||
newHeaders.set("Cross-Origin-Embedder-Policy",
|
||||
coepCredentialless ? "credentialless" : "require-corp"
|
||||
);
|
||||
if (!coepCredentialless) {
|
||||
newHeaders.set("Cross-Origin-Resource-Policy", "cross-origin");
|
||||
}
|
||||
newHeaders.set("Cross-Origin-Opener-Policy", "same-origin");
|
||||
|
||||
return new Response(response.body, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
headers: newHeaders,
|
||||
});
|
||||
})
|
||||
.catch((e) => console.error(e))
|
||||
);
|
||||
});
|
||||
|
||||
} else {
|
||||
(() => {
|
||||
const reloadedBySelf = window.sessionStorage.getItem("coiReloadedBySelf");
|
||||
window.sessionStorage.removeItem("coiReloadedBySelf");
|
||||
const coepDegrading = (reloadedBySelf == "coepdegrade");
|
||||
|
||||
// You can customize the behavior of this script through a global `coi` variable.
|
||||
const coi = {
|
||||
shouldRegister: () => !reloadedBySelf,
|
||||
shouldDeregister: () => false,
|
||||
coepCredentialless: () => true,
|
||||
coepDegrade: () => true,
|
||||
doReload: () => window.location.reload(),
|
||||
quiet: false,
|
||||
...window.coi
|
||||
};
|
||||
|
||||
const n = navigator;
|
||||
const controlling = n.serviceWorker && n.serviceWorker.controller;
|
||||
|
||||
// Record the failure if the page is served by serviceWorker.
|
||||
if (controlling && !window.crossOriginIsolated) {
|
||||
window.sessionStorage.setItem("coiCoepHasFailed", "true");
|
||||
}
|
||||
const coepHasFailed = window.sessionStorage.getItem("coiCoepHasFailed");
|
||||
|
||||
if (controlling) {
|
||||
// Reload only on the first failure.
|
||||
const reloadToDegrade = coi.coepDegrade() && !(
|
||||
coepDegrading || window.crossOriginIsolated
|
||||
);
|
||||
n.serviceWorker.controller.postMessage({
|
||||
type: "coepCredentialless",
|
||||
value: (reloadToDegrade || coepHasFailed && coi.coepDegrade())
|
||||
? false
|
||||
: coi.coepCredentialless(),
|
||||
});
|
||||
if (reloadToDegrade) {
|
||||
!coi.quiet && console.log("Reloading page to degrade COEP.");
|
||||
window.sessionStorage.setItem("coiReloadedBySelf", "coepdegrade");
|
||||
coi.doReload("coepdegrade");
|
||||
}
|
||||
|
||||
if (coi.shouldDeregister()) {
|
||||
n.serviceWorker.controller.postMessage({ type: "deregister" });
|
||||
}
|
||||
}
|
||||
|
||||
// If we're already coi: do nothing. Perhaps it's due to this script doing its job, or COOP/COEP are
|
||||
// already set from the origin server. Also if the browser has no notion of crossOriginIsolated, just give up here.
|
||||
if (window.crossOriginIsolated !== false || !coi.shouldRegister()) return;
|
||||
|
||||
if (!window.isSecureContext) {
|
||||
!coi.quiet && console.log("COOP/COEP Service Worker not registered, a secure context is required.");
|
||||
return;
|
||||
}
|
||||
|
||||
// In some environments (e.g. Firefox private mode) this won't be available
|
||||
if (!n.serviceWorker) {
|
||||
!coi.quiet && console.error("COOP/COEP Service Worker not registered, perhaps due to private mode.");
|
||||
return;
|
||||
}
|
||||
|
||||
n.serviceWorker.register(window.document.currentScript.src).then(
|
||||
(registration) => {
|
||||
!coi.quiet && console.log("COOP/COEP Service Worker registered", registration.scope);
|
||||
|
||||
registration.addEventListener("updatefound", () => {
|
||||
!coi.quiet && console.log("Reloading page to make use of updated COOP/COEP Service Worker.");
|
||||
window.sessionStorage.setItem("coiReloadedBySelf", "updatefound");
|
||||
coi.doReload();
|
||||
});
|
||||
|
||||
// If the registration is active, but it's not controlling the page
|
||||
if (registration.active && !n.serviceWorker.controller) {
|
||||
!coi.quiet && console.log("Reloading page to make use of COOP/COEP Service Worker.");
|
||||
window.sessionStorage.setItem("coiReloadedBySelf", "notcontrolling");
|
||||
coi.doReload();
|
||||
}
|
||||
},
|
||||
(err) => {
|
||||
!coi.quiet && console.error("COOP/COEP Service Worker failed to register:", err);
|
||||
}
|
||||
);
|
||||
})();
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
This is a basic Voice Assistant example that accepts voice commands from the microphone.
|
||||
It runs in fully in the browser via WebAseembly.
|
||||
|
||||
Online demo: https://whisper.ggerganov.com/command/
|
||||
Online demo: https://ggerganov.github.io/whisper.cpp/command.wasm
|
||||
|
||||
Terminal version: [examples/command](/examples/command)
|
||||
|
||||
@ -15,9 +15,18 @@ git clone https://github.com/ggerganov/whisper.cpp
|
||||
cd whisper.cpp
|
||||
mkdir build-em && cd build-em
|
||||
emcmake cmake ..
|
||||
make -j
|
||||
make -j libcommand
|
||||
```
|
||||
The example can then be started by running a local HTTP server:
|
||||
```console
|
||||
python3 examples/server.py
|
||||
```
|
||||
And then opening a browser to the following URL:
|
||||
http://localhost:8000/command.wasm/
|
||||
|
||||
# copy the produced page to your HTTP path
|
||||
To run the example in a different server, you need to copy the following files
|
||||
to the server's HTTP path:
|
||||
```
|
||||
cp bin/command.wasm/* /path/to/html/
|
||||
cp bin/libcommand.worker.js /path/to/html/
|
||||
```
|
||||
|
@ -24,6 +24,8 @@
|
||||
overflow-x: scroll;
|
||||
}
|
||||
</style>
|
||||
<script src="../coi-serviceworker.js"></script>
|
||||
<link rel="icon" href="data:,">
|
||||
</head>
|
||||
<body>
|
||||
<div id="main-container">
|
||||
@ -36,11 +38,10 @@
|
||||
<br><br>
|
||||
|
||||
<b>More examples:</b>
|
||||
<a href="https://whisper.ggerganov.com/">main</a> |
|
||||
<a href="https://whisper.ggerganov.com/bench">bench</a> |
|
||||
<a href="https://whisper.ggerganov.com/stream">stream</a> |
|
||||
<a href="https://whisper.ggerganov.com/command">command</a> |
|
||||
<a href="https://whisper.ggerganov.com/talk">talk</a> |
|
||||
<a href="../">main</a> |
|
||||
<a href="../bench.wasm/">bench</a> |
|
||||
<a href="../stream.wasm">stream</a> |
|
||||
<a href="../command.wasm/">command</a> |
|
||||
|
||||
<br><br>
|
||||
|
||||
|
@ -11,16 +11,15 @@
|
||||
#include "whisper.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
|
@ -159,15 +159,11 @@ void audio_async::callback(uint8_t * stream, int len) {
|
||||
|
||||
memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
|
||||
memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
|
||||
|
||||
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
|
||||
m_audio_len = m_audio.size();
|
||||
} else {
|
||||
memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
|
||||
|
||||
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
|
||||
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
|
||||
}
|
||||
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
|
||||
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
172
examples/common-whisper.cpp
Normal file
172
examples/common-whisper.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
#define _USE_MATH_DEFINES // for M_PI
|
||||
|
||||
#include "common-whisper.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include "whisper.h"
|
||||
|
||||
// third-party utilities
|
||||
// use your favorite implementations
|
||||
#define STB_VORBIS_HEADER_ONLY
|
||||
#include "stb_vorbis.c" /* Enables Vorbis decoding. */
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define MA_NO_DEVICE_IO
|
||||
#define MA_NO_THREADING
|
||||
#define MA_NO_ENCODING
|
||||
#define MA_NO_GENERATION
|
||||
#define MA_NO_RESOURCE_MANAGER
|
||||
#define MA_NO_NODE_GRAPH
|
||||
#define MINIAUDIO_IMPLEMENTATION
|
||||
#include "miniaudio.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef WHISPER_FFMPEG
|
||||
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
|
||||
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
|
||||
#endif
|
||||
|
||||
bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
|
||||
std::vector<uint8_t> audio_data; // used for pipe input from stdin or ffmpeg decoding output
|
||||
|
||||
ma_result result;
|
||||
ma_decoder_config decoder_config;
|
||||
ma_decoder decoder;
|
||||
|
||||
decoder_config = ma_decoder_config_init(ma_format_f32, stereo ? 2 : 1, WHISPER_SAMPLE_RATE);
|
||||
|
||||
if (fname == "-") {
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(stdin), _O_BINARY);
|
||||
#endif
|
||||
|
||||
uint8_t buf[1024];
|
||||
while (true)
|
||||
{
|
||||
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
audio_data.insert(audio_data.end(), buf, buf + n);
|
||||
}
|
||||
|
||||
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
||||
|
||||
fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
|
||||
}
|
||||
else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
|
||||
#if defined(WHISPER_FFMPEG)
|
||||
if (ffmpeg_decode_audio(fname, audio_data) != 0) {
|
||||
fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
||||
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
|
||||
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
||||
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
|
||||
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
ma_uint64 frame_count;
|
||||
ma_uint64 frames_read;
|
||||
|
||||
if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) {
|
||||
fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pcmf32.resize(stereo ? frame_count*2 : frame_count);
|
||||
|
||||
if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) {
|
||||
fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stereo) {
|
||||
pcmf32s.resize(2);
|
||||
pcmf32s[0].resize(frame_count);
|
||||
pcmf32s[1].resize(frame_count);
|
||||
for (uint64_t i = 0; i < frame_count; i++) {
|
||||
pcmf32s[0][i] = pcmf32[2*i];
|
||||
pcmf32s[1][i] = pcmf32[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
ma_decoder_uninit(&decoder);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// 500 -> 00:05.000
|
||||
// 6000 -> 01:00.000
|
||||
std::string to_timestamp(int64_t t, bool comma) {
|
||||
int64_t msec = t * 10;
|
||||
int64_t hr = msec / (1000 * 60 * 60);
|
||||
msec = msec - hr * (1000 * 60 * 60);
|
||||
int64_t min = msec / (1000 * 60);
|
||||
msec = msec - min * (1000 * 60);
|
||||
int64_t sec = msec / 1000;
|
||||
msec = msec - sec * 1000;
|
||||
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
|
||||
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
|
||||
}
|
||||
|
||||
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id) {
|
||||
std::ofstream speak_file(path.c_str());
|
||||
if (speak_file.fail()) {
|
||||
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
|
||||
return false;
|
||||
} else {
|
||||
speak_file.write(text.c_str(), text.size());
|
||||
speak_file.close();
|
||||
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to speak\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef STB_VORBIS_HEADER_ONLY
|
||||
#include "stb_vorbis.c"
|
24
examples/common-whisper.h
Normal file
24
examples/common-whisper.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
// Read WAV audio file and store the PCM data into pcmf32
|
||||
// fname can be a buffer of WAV data instead of a filename
|
||||
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
||||
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
||||
bool read_audio_data(
|
||||
const std::string & fname,
|
||||
std::vector<float> & pcmf32,
|
||||
std::vector<std::vector<float>> & pcmf32s,
|
||||
bool stereo);
|
||||
|
||||
// convert timestamp to string, 6000 -> 01:00.000
|
||||
std::string to_timestamp(int64_t t, bool comma = false);
|
||||
|
||||
// given a timestamp get the sample
|
||||
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
|
||||
|
||||
// write text to file, and call system("command voice_id file")
|
||||
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);
|
@ -2,33 +2,18 @@
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// third-party utilities
|
||||
// use your favorite implementations
|
||||
#define DR_WAV_IMPLEMENTATION
|
||||
#include "dr_wav.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <codecvt>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#ifdef WHISPER_FFMPEG
|
||||
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
|
||||
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
|
||||
#endif
|
||||
|
||||
// Function to check if the next argument exists
|
||||
static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
|
||||
if (i + 1 < argc && argv[i + 1][0] != '-') {
|
||||
@ -262,17 +247,6 @@ std::map<std::string, int32_t> json_parse(const std::string & fname) {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string convert_to_utf8(const std::wstring & input) {
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
return converter.to_bytes(input);
|
||||
}
|
||||
|
||||
|
||||
std::wstring convert_to_wstring(const std::string & input) {
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
return converter.from_bytes(input);
|
||||
}
|
||||
|
||||
void gpt_split_words(std::string str, std::vector<std::string>& words) {
|
||||
const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
|
||||
const std::regex re(pattern);
|
||||
@ -624,129 +598,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
||||
|
||||
}
|
||||
|
||||
bool is_wav_buffer(const std::string buf) {
|
||||
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
|
||||
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
|
||||
if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t chunk_size = *reinterpret_cast<const uint32_t*>(buf.data() + 4);
|
||||
if (chunk_size + 8 != buf.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
|
||||
drwav wav;
|
||||
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
|
||||
|
||||
if (fname == "-") {
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(stdin), _O_BINARY);
|
||||
#endif
|
||||
|
||||
uint8_t buf[1024];
|
||||
while (true)
|
||||
{
|
||||
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
wav_data.insert(wav_data.end(), buf, buf + n);
|
||||
}
|
||||
}
|
||||
|
||||
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
||||
}
|
||||
else if (is_wav_buffer(fname)) {
|
||||
if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open WAV file from fname buffer\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
|
||||
#if defined(WHISPER_FFMPEG)
|
||||
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
|
||||
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
|
||||
return false;
|
||||
}
|
||||
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to read wav data as wav \n");
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (wav.channels != 1 && wav.channels != 2) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
|
||||
drwav_uninit(&wav);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stereo && wav.channels != 2) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
|
||||
drwav_uninit(&wav);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
|
||||
drwav_uninit(&wav);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (wav.bitsPerSample != 16) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
|
||||
drwav_uninit(&wav);
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
||||
|
||||
std::vector<int16_t> pcm16;
|
||||
pcm16.resize(n*wav.channels);
|
||||
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
||||
drwav_uninit(&wav);
|
||||
|
||||
// convert to mono, float
|
||||
pcmf32.resize(n);
|
||||
if (wav.channels == 1) {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[i])/32768.0f;
|
||||
}
|
||||
} else {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
||||
}
|
||||
}
|
||||
|
||||
if (stereo) {
|
||||
// convert to stereo, float
|
||||
pcmf32s.resize(2);
|
||||
|
||||
pcmf32s[0].resize(n);
|
||||
pcmf32s[1].resize(n);
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
||||
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
|
||||
const float rc = 1.0f / (2.0f * M_PI * cutoff);
|
||||
const float dt = 1.0f / sample_rate;
|
||||
@ -822,90 +673,7 @@ float similarity(const std::string & s0, const std::string & s1) {
|
||||
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
||||
}
|
||||
|
||||
bool sam_params_parse(int argc, char ** argv, sam_params & params) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-s" || arg == "--seed") {
|
||||
params.seed = std::stoi(argv[++i]);
|
||||
} else if (arg == "-t" || arg == "--threads") {
|
||||
params.n_threads = std::stoi(argv[++i]);
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
params.model = argv[++i];
|
||||
} else if (arg == "-i" || arg == "--inp") {
|
||||
params.fname_inp = argv[++i];
|
||||
} else if (arg == "-o" || arg == "--out") {
|
||||
params.fname_out = argv[++i];
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
sam_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
sam_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n");
|
||||
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||
fprintf(stderr, " -i FNAME, --inp FNAME\n");
|
||||
fprintf(stderr, " input file (default: %s)\n", params.fname_inp.c_str());
|
||||
fprintf(stderr, " -o FNAME, --out FNAME\n");
|
||||
fprintf(stderr, " output file (default: %s)\n", params.fname_out.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
// 500 -> 00:05.000
|
||||
// 6000 -> 01:00.000
|
||||
std::string to_timestamp(int64_t t, bool comma) {
|
||||
int64_t msec = t * 10;
|
||||
int64_t hr = msec / (1000 * 60 * 60);
|
||||
msec = msec - hr * (1000 * 60 * 60);
|
||||
int64_t min = msec / (1000 * 60);
|
||||
msec = msec - min * (1000 * 60);
|
||||
int64_t sec = msec / 1000;
|
||||
msec = msec - sec * 1000;
|
||||
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
|
||||
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
|
||||
}
|
||||
|
||||
bool is_file_exist(const char *fileName)
|
||||
{
|
||||
std::ifstream infile(fileName);
|
||||
bool is_file_exist(const char * filename) {
|
||||
std::ifstream infile(filename);
|
||||
return infile.good();
|
||||
}
|
||||
|
||||
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id)
|
||||
{
|
||||
std::ofstream speak_file(path.c_str());
|
||||
if (speak_file.fail()) {
|
||||
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
|
||||
return false;
|
||||
} else {
|
||||
speak_file.write(text.c_str(), text.size());
|
||||
speak_file.close();
|
||||
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to speak\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -11,8 +11,6 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#define COMMON_SAMPLE_RATE 16000
|
||||
|
||||
//
|
||||
// GPT CLI argument parsing
|
||||
//
|
||||
@ -136,19 +134,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
||||
// Audio utils
|
||||
//
|
||||
|
||||
// Check if a buffer is a WAV audio file
|
||||
bool is_wav_buffer(const std::string buf);
|
||||
|
||||
// Read WAV audio file and store the PCM data into pcmf32
|
||||
// fname can be a buffer of WAV data instead of a filename
|
||||
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
||||
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
||||
bool read_wav(
|
||||
const std::string & fname,
|
||||
std::vector<float> & pcmf32,
|
||||
std::vector<std::vector<float>> & pcmf32s,
|
||||
bool stereo);
|
||||
|
||||
// Write PCM data into WAV audio file
|
||||
class wav_writer {
|
||||
private:
|
||||
@ -266,23 +251,6 @@ bool vad_simple(
|
||||
// compute similarity between two strings using Levenshtein distance
|
||||
float similarity(const std::string & s0, const std::string & s1);
|
||||
|
||||
//
|
||||
// SAM argument parsing
|
||||
//
|
||||
|
||||
struct sam_params {
|
||||
int32_t seed = -1; // RNG seed
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
|
||||
std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
|
||||
std::string fname_inp = "img.jpg";
|
||||
std::string fname_out = "img.out";
|
||||
};
|
||||
|
||||
bool sam_params_parse(int argc, char ** argv, sam_params & params);
|
||||
|
||||
void sam_print_usage(int argc, char ** argv, const sam_params & params);
|
||||
|
||||
//
|
||||
// Terminal utils
|
||||
//
|
||||
@ -330,14 +298,5 @@ const std::vector<std::string> k_colors = {
|
||||
// Other utils
|
||||
//
|
||||
|
||||
// convert timestamp to string, 6000 -> 01:00.000
|
||||
std::string to_timestamp(int64_t t, bool comma = false);
|
||||
|
||||
// given a timestamp get the sample
|
||||
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
|
||||
|
||||
// check if file exists using ifstream
|
||||
bool is_file_exist(const char *fileName);
|
||||
|
||||
// write text to file, and call system("command voice_id file")
|
||||
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);
|
||||
bool is_file_exist(const char * filename);
|
||||
|
@ -1,4 +1,6 @@
|
||||
add_executable(main ./deprecation-warning.cpp)
|
||||
add_executable(bench ./deprecation-warning.cpp)
|
||||
add_executable(stream ./deprecation-warning.cpp)
|
||||
add_executable(command ./deprecation-warning.cpp)
|
||||
if (WHISPER_SDL2)
|
||||
add_executable(stream ./deprecation-warning.cpp)
|
||||
add_executable(command ./deprecation-warning.cpp)
|
||||
endif()
|
||||
|
8815
examples/dr_wav.h
8815
examples/dr_wav.h
File diff suppressed because it is too large
Load Diff
@ -41,20 +41,17 @@ fi
|
||||
# record some raw audio
|
||||
sox -d rec.wav
|
||||
|
||||
# resample to 16kHz
|
||||
ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1
|
||||
|
||||
# run Whisper
|
||||
echo "Processing ..."
|
||||
${executable} -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1
|
||||
${executable} -m models/ggml-base.en.bin rec.wav -owts > /dev/null 2>&1
|
||||
|
||||
# generate Karaoke video
|
||||
echo "Generating video ..."
|
||||
source rec16.wav.wts > /dev/null 2>&1
|
||||
source rec.wav.wts > /dev/null 2>&1
|
||||
|
||||
# play the video
|
||||
echo "Playing ./rec16.wav.mp4 ..."
|
||||
ffplay -loglevel 0 -autoexit ./rec16.wav.mp4
|
||||
ffplay -loglevel 0 -autoexit ./rec.wav.mp4
|
||||
|
||||
echo "Done"
|
||||
exit 0
|
||||
|
@ -3,14 +3,15 @@
|
||||
#include "whisper.h"
|
||||
#include "json.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <deque>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
|
93468
examples/miniaudio.h
Normal file
93468
examples/miniaudio.h
Normal file
File diff suppressed because it is too large
Load Diff
39
examples/server.py
Normal file
39
examples/server.py
Normal file
@ -0,0 +1,39 @@
|
||||
import http.server
|
||||
import socketserver
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent.absolute()
|
||||
DIRECTORY = os.path.join(SCRIPT_DIR, "../build-em/bin")
|
||||
DIRECTORY = os.path.abspath(DIRECTORY)
|
||||
|
||||
class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, directory=DIRECTORY, **kwargs)
|
||||
|
||||
def do_GET(self):
|
||||
# If requesting a worker file from any subdirectory
|
||||
if '.worker.js' in self.path:
|
||||
worker_file = os.path.basename(self.path)
|
||||
worker_path = os.path.join(DIRECTORY, worker_file)
|
||||
|
||||
if os.path.exists(worker_path):
|
||||
self.path = '/' + worker_file
|
||||
|
||||
return super().do_GET()
|
||||
|
||||
def end_headers(self):
|
||||
# Add required headers for SharedArrayBuffer
|
||||
self.send_header("Cross-Origin-Opener-Policy", "same-origin")
|
||||
self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
|
||||
self.send_header("Access-Control-Allow-Origin", "*");
|
||||
super().end_headers()
|
||||
|
||||
PORT = 8000
|
||||
|
||||
with socketserver.TCPServer(("", PORT), CustomHTTPRequestHandler) as httpd:
|
||||
print(f"Serving directory '{DIRECTORY}' at http://localhost:{PORT}")
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nServer stopped.")
|
@ -1,17 +1,18 @@
|
||||
#include "common.h"
|
||||
#include "common-whisper.h"
|
||||
|
||||
#include "whisper.h"
|
||||
#include "httplib.h"
|
||||
#include "json.hpp"
|
||||
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
@ -223,6 +224,24 @@ void check_ffmpeg_availibility() {
|
||||
}
|
||||
}
|
||||
|
||||
std::string generate_temp_filename(const std::string &prefix, const std::string &extension) {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto now_time_t = std::chrono::system_clock::to_time_t(now);
|
||||
|
||||
static std::mt19937 rng{std::random_device{}()};
|
||||
std::uniform_int_distribution<long long> dist(0, 1e9);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << prefix
|
||||
<< "-"
|
||||
<< std::put_time(std::localtime(&now_time_t), "%Y%m%d-%H%M%S")
|
||||
<< "-"
|
||||
<< dist(rng)
|
||||
<< extension;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
|
||||
std::ostringstream cmd_stream;
|
||||
std::string converted_filename_temp = temp_filename + "_temp.wav";
|
||||
@ -692,9 +711,7 @@ int main(int argc, char ** argv) {
|
||||
if (sparams.ffmpeg_converter) {
|
||||
// if file is not wav, convert to wav
|
||||
// write to temporary file
|
||||
//const std::string temp_filename_base = std::tmpnam(nullptr);
|
||||
const std::string temp_filename_base = "whisper-server-tmp"; // TODO: this is a hack, remove when the mutext is removed
|
||||
const std::string temp_filename = temp_filename_base + ".wav";
|
||||
const std::string temp_filename = generate_temp_filename("whisper-server", ".wav");
|
||||
std::ofstream temp_file{temp_filename, std::ios::binary};
|
||||
temp_file << audio_file.content;
|
||||
temp_file.close();
|
||||
@ -706,8 +723,8 @@ int main(int argc, char ** argv) {
|
||||
return;
|
||||
}
|
||||
|
||||
// read wav content into pcmf32
|
||||
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize))
|
||||
// read audio content into pcmf32
|
||||
if (!::read_audio_data(temp_filename, pcmf32, pcmf32s, params.diarize))
|
||||
{
|
||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
|
||||
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
|
||||
@ -718,10 +735,10 @@ int main(int argc, char ** argv) {
|
||||
// remove temp file
|
||||
std::remove(temp_filename.c_str());
|
||||
} else {
|
||||
if (!::read_wav(audio_file.content, pcmf32, pcmf32s, params.diarize))
|
||||
if (!::read_audio_data(audio_file.content, pcmf32, pcmf32s, params.diarize))
|
||||
{
|
||||
fprintf(stderr, "error: failed to read WAV file\n");
|
||||
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
|
||||
fprintf(stderr, "error: failed to read audio data\n");
|
||||
const std::string error_resp = "{\"error\":\"failed to read audio data\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
return;
|
||||
}
|
||||
@ -1007,6 +1024,11 @@ int main(int argc, char ** argv) {
|
||||
// check if the model is in the file system
|
||||
});
|
||||
|
||||
svr.Get(sparams.request_path + "/health", [&](const Request &, Response &res){
|
||||
const std::string health_response = "{\"status\":\"ok\"}";
|
||||
res.set_content(health_response, "application/json");
|
||||
});
|
||||
|
||||
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
|
||||
const char fmt[] = "500 Internal Server Error\n%s";
|
||||
char buf[BUFSIZ];
|
||||
|
5584
examples/stb_vorbis.c
Normal file
5584
examples/stb_vorbis.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,17 @@ cd whisper.cpp
|
||||
mkdir build-em && cd build-em
|
||||
emcmake cmake ..
|
||||
make -j
|
||||
```
|
||||
The example can then be started by running a local HTTP server:
|
||||
```console
|
||||
python3 examples/server.py
|
||||
```
|
||||
And then opening a browser to the following URL:
|
||||
http://localhost:8000/stream.wasm
|
||||
|
||||
To run the example in a different server, you need to copy the following files
|
||||
to the server's HTTP path:
|
||||
```
|
||||
# copy the produced page to your HTTP path
|
||||
cp bin/stream.wasm/* /path/to/html/
|
||||
cp bin/libstream.worker.js /path/to/html/
|
||||
|
@ -24,6 +24,8 @@
|
||||
overflow-x: scroll;
|
||||
}
|
||||
</style>
|
||||
<script src="../coi-serviceworker.js"></script>
|
||||
<link rel="icon" href="data:,">
|
||||
</head>
|
||||
<body>
|
||||
<div id="main-container">
|
||||
@ -36,11 +38,10 @@
|
||||
<br><br>
|
||||
|
||||
<b>More examples:</b>
|
||||
<a href="https://whisper.ggerganov.com/">main</a> |
|
||||
<a href="https://whisper.ggerganov.com/bench">bench</a> |
|
||||
<a href="https://whisper.ggerganov.com/stream">stream</a> |
|
||||
<a href="https://whisper.ggerganov.com/command">command</a> |
|
||||
<a href="https://whisper.ggerganov.com/talk">talk</a> |
|
||||
<a href="../">main</a> |
|
||||
<a href="../bench.wasm/">bench</a> |
|
||||
<a href="../stream.wasm">stream</a> |
|
||||
<a href="../command.wasm/">command</a> |
|
||||
|
||||
<br><br>
|
||||
|
||||
|
@ -4,15 +4,15 @@
|
||||
//
|
||||
#include "common-sdl.h"
|
||||
#include "common.h"
|
||||
#include "common-whisper.h"
|
||||
#include "whisper.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
@ -23,6 +23,7 @@ struct whisper_params {
|
||||
int32_t capture_id = -1;
|
||||
int32_t max_tokens = 32;
|
||||
int32_t audio_ctx = 0;
|
||||
int32_t beam_size = -1;
|
||||
|
||||
float vad_thold = 0.6f;
|
||||
float freq_thold = 100.0f;
|
||||
@ -59,6 +60,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
@ -96,6 +98,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
||||
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
||||
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
||||
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
|
||||
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
||||
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
@ -241,6 +244,11 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (!use_vad) {
|
||||
while (true) {
|
||||
// handle Ctrl + C
|
||||
is_running = sdl_poll_events();
|
||||
if (!is_running) {
|
||||
break;
|
||||
}
|
||||
audio.get(params.step_ms, pcmf32_new);
|
||||
|
||||
if ((int) pcmf32_new.size() > 2*n_samples_step) {
|
||||
@ -298,7 +306,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// run the inference
|
||||
{
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
whisper_full_params wparams = whisper_full_default_params(params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.print_progress = false;
|
||||
wparams.print_special = params.print_special;
|
||||
@ -309,6 +317,7 @@ int main(int argc, char ** argv) {
|
||||
wparams.max_tokens = params.max_tokens;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.n_threads = params.n_threads;
|
||||
wparams.beam_search.beam_size = params.beam_size;
|
||||
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
|
||||
|
@ -1,18 +1,31 @@
|
||||
if (WHISPER_SDL2)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(TARGET whisper-talk-llama)
|
||||
add_executable(${TARGET} talk-llama.cpp
|
||||
llama.cpp
|
||||
llama-vocab.cpp
|
||||
llama-adapter.cpp
|
||||
llama-arch.cpp
|
||||
llama-batch.cpp
|
||||
llama-chat.cpp
|
||||
llama-context.cpp
|
||||
llama-cparams.cpp
|
||||
llama-grammar.cpp
|
||||
llama-hparams.cpp
|
||||
llama-impl.cpp
|
||||
llama-kv-cache.cpp
|
||||
llama-mmap.cpp
|
||||
llama-model-loader.cpp
|
||||
llama-model.cpp
|
||||
llama-quant.cpp
|
||||
llama-sampling.cpp
|
||||
llama-vocab.cpp
|
||||
unicode.cpp
|
||||
unicode-data.cpp)
|
||||
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
|
||||
|
||||
if (WHISPER_CLBLAST)
|
||||
set(CLBLAST_LIBNAME clblast)
|
||||
endif ()
|
||||
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CLBLAST_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
if(WIN32)
|
||||
# It requires Windows 8.1 or later for PrefetchVirtualMemory
|
||||
|
347
examples/talk-llama/llama-adapter.cpp
Normal file
347
examples/talk-llama/llama-adapter.cpp
Normal file
@ -0,0 +1,347 @@
|
||||
#include "llama-adapter.h"
|
||||
|
||||
#include "llama-impl.h"
|
||||
#include "llama-mmap.h"
|
||||
#include "llama-model.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
|
||||
// vec
|
||||
|
||||
struct ggml_tensor * llama_adapter_cvec::tensor_for(int il) const {
|
||||
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return tensors[il];
|
||||
}
|
||||
|
||||
struct ggml_tensor * llama_adapter_cvec::apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
|
||||
ggml_tensor * layer_dir = tensor_for(il);
|
||||
if (layer_dir != nullptr) {
|
||||
cur = ggml_add(ctx, cur, layer_dir);
|
||||
}
|
||||
|
||||
return cur;
|
||||
}
|
||||
|
||||
bool llama_adapter_cvec::init(const llama_model & model) {
|
||||
const auto & hparams = model.hparams;
|
||||
|
||||
GGML_ASSERT(tensors.empty());
|
||||
GGML_ASSERT(ctxs.empty());
|
||||
GGML_ASSERT(bufs.empty());
|
||||
|
||||
// create a context for each buffer type
|
||||
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
||||
auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
|
||||
auto it = ctx_map.find(buft);
|
||||
if (it == ctx_map.end()) {
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ hparams.n_layer*ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
|
||||
ggml_context * ctx = ggml_init(params);
|
||||
if (!ctx) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ctx_map[buft] = ctx;
|
||||
ctxs.emplace_back(ctx);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
return it->second;
|
||||
};
|
||||
|
||||
// make tensors
|
||||
tensors.reserve(hparams.n_layer);
|
||||
tensors.push_back(nullptr); // there's never a tensor for layer 0
|
||||
for (size_t il = 1; il < hparams.n_layer; il++) {
|
||||
ggml_backend_buffer_type_t buft = model.select_buft(il);
|
||||
ggml_context * ctx = ctx_for_buft(buft);
|
||||
if (!ctx) {
|
||||
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
|
||||
return false;
|
||||
}
|
||||
ggml_tensor * tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hparams.n_embd);
|
||||
tensors.push_back(tensor);
|
||||
}
|
||||
|
||||
// allocate tensors / buffers and zero
|
||||
bufs.reserve(ctx_map.size());
|
||||
for (auto it : ctx_map) {
|
||||
ggml_backend_buffer_type_t buft = it.first;
|
||||
ggml_context * ctx = it.second;
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
||||
if (!buf) {
|
||||
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
|
||||
return false;
|
||||
}
|
||||
ggml_backend_buffer_clear(buf, 0);
|
||||
bufs.emplace_back(buf);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t llama_adapter_cvec::apply(
|
||||
const llama_model & model,
|
||||
const float * data,
|
||||
size_t len,
|
||||
int32_t n_embd,
|
||||
int32_t il_start,
|
||||
int32_t il_end) {
|
||||
const auto & hparams = model.hparams;
|
||||
|
||||
if (data == nullptr) {
|
||||
// disable the current control vector (but leave allocated for later)
|
||||
layer_start = -1;
|
||||
layer_end = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (n_embd != (int) hparams.n_embd) {
|
||||
LLAMA_LOG_ERROR("%s: control vector n_embd does not match model\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (tensors.empty()) {
|
||||
if (!init(model)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
layer_start = il_start;
|
||||
layer_end = il_end;
|
||||
|
||||
for (size_t il = 1; il < hparams.n_layer; il++) {
|
||||
assert(tensors[il] != nullptr);
|
||||
|
||||
const size_t off = n_embd * (il - 1); // buffer doesn't have data for layer 0, since it's never present
|
||||
if (off + n_embd <= len) {
|
||||
ggml_backend_tensor_set(tensors[il], data + off, 0, n_embd * ggml_element_size(tensors[il]));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// lora
|
||||
|
||||
llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor * w) {
|
||||
const std::string name(w->name);
|
||||
|
||||
const auto pos = ab_map.find(name);
|
||||
if (pos != ab_map.end()) {
|
||||
return &pos->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void llama_adapter_lora_init_impl(struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter) {
|
||||
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
||||
|
||||
ggml_context * ctx_init;
|
||||
struct gguf_init_params meta_gguf_params = {
|
||||
/* .no_alloc = */ true,
|
||||
/* .ctx = */ &ctx_init,
|
||||
};
|
||||
|
||||
gguf_context_ptr ctx_gguf { gguf_init_from_file(path_lora, meta_gguf_params) };
|
||||
if (!ctx_gguf) {
|
||||
throw std::runtime_error("failed to load lora adapter file from " + std::string(path_lora));
|
||||
}
|
||||
|
||||
ggml_context_ptr ctx { ctx_init };
|
||||
|
||||
// check metadata
|
||||
{
|
||||
auto get_kv_str = [&](const std::string & key) -> std::string {
|
||||
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
|
||||
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf.get(), id));
|
||||
};
|
||||
auto get_kv_f32 = [&](const std::string & key) -> float {
|
||||
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
|
||||
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf.get(), id);
|
||||
};
|
||||
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
||||
|
||||
auto general_type = get_kv_str(llm_kv(LLM_KV_GENERAL_TYPE));
|
||||
if (general_type != "adapter") {
|
||||
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
|
||||
}
|
||||
|
||||
auto general_arch_str = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
|
||||
auto general_arch = llm_arch_from_string(general_arch_str);
|
||||
if (general_arch != model.arch) {
|
||||
throw std::runtime_error("model arch and LoRA arch mismatch");
|
||||
}
|
||||
|
||||
auto adapter_type = get_kv_str(llm_kv(LLM_KV_ADAPTER_TYPE));
|
||||
if (adapter_type != "lora") {
|
||||
throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
|
||||
}
|
||||
|
||||
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
|
||||
}
|
||||
|
||||
int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
|
||||
|
||||
// contexts for each buffer type
|
||||
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
||||
auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
|
||||
auto it = ctx_map.find(buft);
|
||||
if (it == ctx_map.end()) {
|
||||
// add a new context
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ n_tensors*ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
ggml_context * buft_ctx = ggml_init(params);
|
||||
if (!buft_ctx) {
|
||||
return nullptr;
|
||||
}
|
||||
ctx_map[buft] = buft_ctx;
|
||||
adapter.ctxs.emplace_back(buft_ctx);
|
||||
return buft_ctx;
|
||||
};
|
||||
return it->second;
|
||||
};
|
||||
|
||||
// bundle lora_a and lora_b into pairs
|
||||
std::map<std::string, llama_adapter_lora_weight> ab_map;
|
||||
auto str_endswith = [](const std::string & str, const std::string & suffix) {
|
||||
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
||||
};
|
||||
|
||||
for (ggml_tensor * cur = ggml_get_first_tensor(ctx.get()); cur; cur = ggml_get_next_tensor(ctx.get(), cur)) {
|
||||
std::string name(cur->name);
|
||||
if (str_endswith(name, ".lora_a")) {
|
||||
replace_all(name, ".lora_a", "");
|
||||
if (ab_map.find(name) == ab_map.end()) {
|
||||
ab_map[name] = llama_adapter_lora_weight(cur, nullptr);
|
||||
} else {
|
||||
ab_map[name].a = cur;
|
||||
}
|
||||
} else if (str_endswith(name, ".lora_b")) {
|
||||
replace_all(name, ".lora_b", "");
|
||||
if (ab_map.find(name) == ab_map.end()) {
|
||||
ab_map[name] = llama_adapter_lora_weight(nullptr, cur);
|
||||
} else {
|
||||
ab_map[name].b = cur;
|
||||
}
|
||||
} else if (str_endswith(name, "_norm.weight")) {
|
||||
// TODO: add support for norm vector
|
||||
// for now, we don't really care because most adapters still work fine without it
|
||||
continue;
|
||||
} else {
|
||||
throw std::runtime_error("LoRA tensor '" + name + "' has unexpected suffix");
|
||||
}
|
||||
}
|
||||
|
||||
// add tensors
|
||||
for (auto & it : ab_map) {
|
||||
const std::string & name = it.first;
|
||||
llama_adapter_lora_weight & w = it.second;
|
||||
bool is_token_embd = str_endswith(name, "token_embd.weight");
|
||||
|
||||
if (!w.a || !w.b) {
|
||||
throw std::runtime_error("LoRA tensor pair for '" + name + "' is missing one component");
|
||||
}
|
||||
|
||||
// device buft and device ctx
|
||||
const auto * model_tensor = model.get_tensor(name.c_str());
|
||||
if (!model_tensor) {
|
||||
throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model (hint: maybe wrong base model?)");
|
||||
}
|
||||
|
||||
struct ggml_context * dev_ctx = ctx_for_buft(ggml_backend_buffer_get_type(model_tensor->buffer));
|
||||
// validate tensor shape
|
||||
if (is_token_embd) {
|
||||
// expect B to be non-transposed, A and B are flipped; see llm_build_inp_embd()
|
||||
if (model_tensor->ne[0] != w.b->ne[1] || model_tensor->ne[1] != w.a->ne[1]) {
|
||||
throw std::runtime_error("tensor '" + name + "' has incorrect shape (hint: maybe wrong base model?)");
|
||||
}
|
||||
} else {
|
||||
if (model_tensor->ne[0] != w.a->ne[0] || model_tensor->ne[1] != w.b->ne[1]) {
|
||||
throw std::runtime_error("tensor '" + name + "' has incorrect shape (hint: maybe wrong base model?)");
|
||||
}
|
||||
if (w.a->ne[1] != w.b->ne[0]) {
|
||||
throw std::runtime_error("lora_a tensor is not transposed (hint: adapter from \"finetune\" example is no longer supported)");
|
||||
}
|
||||
}
|
||||
|
||||
// save tensor to adapter
|
||||
struct ggml_tensor * tensor_a = ggml_dup_tensor(dev_ctx, w.a);
|
||||
struct ggml_tensor * tensor_b = ggml_dup_tensor(dev_ctx, w.b);
|
||||
ggml_set_name(tensor_a, w.a->name);
|
||||
ggml_set_name(tensor_b, w.b->name);
|
||||
adapter.ab_map[name] = llama_adapter_lora_weight(tensor_a, tensor_b);
|
||||
}
|
||||
|
||||
// allocate tensors / buffers and zero
|
||||
{
|
||||
adapter.ctxs.reserve(ctx_map.size());
|
||||
adapter.bufs.reserve(ctx_map.size());
|
||||
for (auto & it : ctx_map) {
|
||||
ggml_backend_buffer_type_t buft = it.first;
|
||||
ggml_context * ctx_dev = it.second;
|
||||
ggml_backend_buffer_ptr buf { ggml_backend_alloc_ctx_tensors_from_buft(ctx_dev, buft) };
|
||||
if (!buf) {
|
||||
throw std::runtime_error("failed to allocate buffer for lora adapter\n");
|
||||
}
|
||||
LLAMA_LOG_INFO("%s: %10s LoRA buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf.get()), ggml_backend_buffer_get_size(buf.get())/1024.0/1024.0);
|
||||
adapter.bufs.emplace_back(std::move(buf));
|
||||
}
|
||||
}
|
||||
|
||||
// set tensor data
|
||||
{
|
||||
llama_file gguf_file(path_lora, "rb");
|
||||
std::vector<uint8_t> read_buf;
|
||||
auto set_tensor = [&](struct ggml_tensor * orig, struct ggml_tensor * dev) {
|
||||
size_t offs = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), gguf_find_tensor(ctx_gguf.get(), orig->name));
|
||||
size_t size = ggml_nbytes(orig);
|
||||
read_buf.resize(size);
|
||||
gguf_file.seek(offs, SEEK_SET);
|
||||
gguf_file.read_raw(read_buf.data(), size);
|
||||
ggml_backend_tensor_set(dev, read_buf.data(), 0, size);
|
||||
};
|
||||
for (auto & it : adapter.ab_map) {
|
||||
auto orig = ab_map[it.first];
|
||||
auto dev = it.second;
|
||||
set_tensor(orig.a, dev.a);
|
||||
set_tensor(orig.b, dev.b);
|
||||
}
|
||||
}
|
||||
|
||||
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
|
||||
}
|
||||
|
||||
struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model, const char * path_lora) {
|
||||
struct llama_adapter_lora * adapter = new llama_adapter_lora();
|
||||
|
||||
try {
|
||||
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
||||
return adapter;
|
||||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
|
||||
|
||||
delete adapter;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void llama_adapter_lora_free(struct llama_adapter_lora * adapter) {
|
||||
delete adapter;
|
||||
}
|
74
examples/talk-llama/llama-adapter.h
Normal file
74
examples/talk-llama/llama-adapter.h
Normal file
@ -0,0 +1,74 @@
|
||||
#pragma once
|
||||
|
||||
#include "llama.h"
|
||||
|
||||
#include "ggml-cpp.h"
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
// TODO: pimpl
|
||||
|
||||
//
|
||||
// llama_adapter_cvec
|
||||
//
|
||||
|
||||
struct llama_adapter_cvec {
|
||||
struct ggml_tensor * tensor_for(int il) const;
|
||||
|
||||
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const;
|
||||
|
||||
int32_t apply(
|
||||
const llama_model & model,
|
||||
const float * data,
|
||||
size_t len,
|
||||
int32_t n_embd,
|
||||
int32_t il_start,
|
||||
int32_t il_end);
|
||||
|
||||
private:
|
||||
bool init(const llama_model & model);
|
||||
|
||||
int32_t layer_start = -1;
|
||||
int32_t layer_end = -1;
|
||||
|
||||
std::vector<ggml_context_ptr> ctxs;
|
||||
std::vector<ggml_backend_buffer_ptr> bufs;
|
||||
|
||||
std::vector<struct ggml_tensor *> tensors; // per layer
|
||||
};
|
||||
|
||||
//
|
||||
// llama_adapter_lora
|
||||
//
|
||||
|
||||
struct llama_adapter_lora_weight {
|
||||
struct ggml_tensor * a = nullptr;
|
||||
struct ggml_tensor * b = nullptr;
|
||||
|
||||
// get actual scale based on rank and alpha
|
||||
float get_scale(float alpha, float adapter_scale) const {
|
||||
const float rank = (float) b->ne[0];
|
||||
const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale;
|
||||
return scale;
|
||||
}
|
||||
|
||||
llama_adapter_lora_weight() = default;
|
||||
llama_adapter_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {}
|
||||
};
|
||||
|
||||
struct llama_adapter_lora {
|
||||
// map tensor name to lora_a_b
|
||||
std::unordered_map<std::string, struct llama_adapter_lora_weight> ab_map;
|
||||
|
||||
std::vector<ggml_context_ptr> ctxs;
|
||||
std::vector<ggml_backend_buffer_ptr> bufs;
|
||||
|
||||
float alpha;
|
||||
|
||||
llama_adapter_lora() = default;
|
||||
~llama_adapter_lora() = default;
|
||||
|
||||
llama_adapter_lora_weight * get_weight(struct ggml_tensor * w);
|
||||
};
|
1492
examples/talk-llama/llama-arch.cpp
Normal file
1492
examples/talk-llama/llama-arch.cpp
Normal file
File diff suppressed because it is too large
Load Diff
402
examples/talk-llama/llama-arch.h
Normal file
402
examples/talk-llama/llama-arch.h
Normal file
@ -0,0 +1,402 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h" // ggml_op
|
||||
|
||||
#include <string>
|
||||
|
||||
//
|
||||
// gguf constants (sync with gguf.py)
|
||||
//
|
||||
|
||||
enum llm_arch {
|
||||
LLM_ARCH_LLAMA,
|
||||
LLM_ARCH_DECI,
|
||||
LLM_ARCH_FALCON,
|
||||
LLM_ARCH_BAICHUAN,
|
||||
LLM_ARCH_GROK,
|
||||
LLM_ARCH_GPT2,
|
||||
LLM_ARCH_GPTJ,
|
||||
LLM_ARCH_GPTNEOX,
|
||||
LLM_ARCH_MPT,
|
||||
LLM_ARCH_STARCODER,
|
||||
LLM_ARCH_REFACT,
|
||||
LLM_ARCH_BERT,
|
||||
LLM_ARCH_NOMIC_BERT,
|
||||
LLM_ARCH_JINA_BERT_V2,
|
||||
LLM_ARCH_BLOOM,
|
||||
LLM_ARCH_STABLELM,
|
||||
LLM_ARCH_QWEN,
|
||||
LLM_ARCH_QWEN2,
|
||||
LLM_ARCH_QWEN2MOE,
|
||||
LLM_ARCH_QWEN2VL,
|
||||
LLM_ARCH_PHI2,
|
||||
LLM_ARCH_PHI3,
|
||||
LLM_ARCH_PHIMOE,
|
||||
LLM_ARCH_PLAMO,
|
||||
LLM_ARCH_CODESHELL,
|
||||
LLM_ARCH_ORION,
|
||||
LLM_ARCH_INTERNLM2,
|
||||
LLM_ARCH_MINICPM,
|
||||
LLM_ARCH_MINICPM3,
|
||||
LLM_ARCH_GEMMA,
|
||||
LLM_ARCH_GEMMA2,
|
||||
LLM_ARCH_STARCODER2,
|
||||
LLM_ARCH_MAMBA,
|
||||
LLM_ARCH_XVERSE,
|
||||
LLM_ARCH_COMMAND_R,
|
||||
LLM_ARCH_COHERE2,
|
||||
LLM_ARCH_DBRX,
|
||||
LLM_ARCH_OLMO,
|
||||
LLM_ARCH_OLMO2,
|
||||
LLM_ARCH_OLMOE,
|
||||
LLM_ARCH_OPENELM,
|
||||
LLM_ARCH_ARCTIC,
|
||||
LLM_ARCH_DEEPSEEK,
|
||||
LLM_ARCH_DEEPSEEK2,
|
||||
LLM_ARCH_CHATGLM,
|
||||
LLM_ARCH_BITNET,
|
||||
LLM_ARCH_T5,
|
||||
LLM_ARCH_T5ENCODER,
|
||||
LLM_ARCH_JAIS,
|
||||
LLM_ARCH_NEMOTRON,
|
||||
LLM_ARCH_EXAONE,
|
||||
LLM_ARCH_RWKV6,
|
||||
LLM_ARCH_RWKV6QWEN2,
|
||||
LLM_ARCH_GRANITE,
|
||||
LLM_ARCH_GRANITE_MOE,
|
||||
LLM_ARCH_CHAMELEON,
|
||||
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
enum llm_kv {
|
||||
LLM_KV_GENERAL_TYPE,
|
||||
LLM_KV_GENERAL_ARCHITECTURE,
|
||||
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
||||
LLM_KV_GENERAL_ALIGNMENT,
|
||||
LLM_KV_GENERAL_NAME,
|
||||
LLM_KV_GENERAL_AUTHOR,
|
||||
LLM_KV_GENERAL_VERSION,
|
||||
LLM_KV_GENERAL_URL,
|
||||
LLM_KV_GENERAL_DESCRIPTION,
|
||||
LLM_KV_GENERAL_LICENSE,
|
||||
LLM_KV_GENERAL_SOURCE_URL,
|
||||
LLM_KV_GENERAL_SOURCE_HF_REPO,
|
||||
|
||||
LLM_KV_VOCAB_SIZE,
|
||||
LLM_KV_CONTEXT_LENGTH,
|
||||
LLM_KV_EMBEDDING_LENGTH,
|
||||
LLM_KV_FEATURES_LENGTH,
|
||||
LLM_KV_BLOCK_COUNT,
|
||||
LLM_KV_LEADING_DENSE_BLOCK_COUNT,
|
||||
LLM_KV_FEED_FORWARD_LENGTH,
|
||||
LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
|
||||
LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
|
||||
LLM_KV_USE_PARALLEL_RESIDUAL,
|
||||
LLM_KV_TENSOR_DATA_LAYOUT,
|
||||
LLM_KV_EXPERT_COUNT,
|
||||
LLM_KV_EXPERT_USED_COUNT,
|
||||
LLM_KV_EXPERT_SHARED_COUNT,
|
||||
LLM_KV_EXPERT_WEIGHTS_SCALE,
|
||||
LLM_KV_EXPERT_WEIGHTS_NORM,
|
||||
LLM_KV_EXPERT_GATING_FUNC,
|
||||
LLM_KV_POOLING_TYPE,
|
||||
LLM_KV_LOGIT_SCALE,
|
||||
LLM_KV_DECODER_START_TOKEN_ID,
|
||||
LLM_KV_ATTN_LOGIT_SOFTCAPPING,
|
||||
LLM_KV_FINAL_LOGIT_SOFTCAPPING,
|
||||
LLM_KV_SWIN_NORM,
|
||||
LLM_KV_RESCALE_EVERY_N_LAYERS,
|
||||
LLM_KV_TIME_MIX_EXTRA_DIM,
|
||||
LLM_KV_TIME_DECAY_EXTRA_DIM,
|
||||
LLM_KV_RESIDUAL_SCALE,
|
||||
LLM_KV_EMBEDDING_SCALE,
|
||||
LLM_KV_TOKEN_SHIFT_COUNT,
|
||||
|
||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||
LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
|
||||
LLM_KV_ATTENTION_CLAMP_KQV,
|
||||
LLM_KV_ATTENTION_KEY_LENGTH,
|
||||
LLM_KV_ATTENTION_VALUE_LENGTH,
|
||||
LLM_KV_ATTENTION_LAYERNORM_EPS,
|
||||
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
||||
LLM_KV_ATTENTION_GROUPNORM_EPS,
|
||||
LLM_KV_ATTENTION_GROUPNORM_GROUPS,
|
||||
LLM_KV_ATTENTION_CAUSAL,
|
||||
LLM_KV_ATTENTION_Q_LORA_RANK,
|
||||
LLM_KV_ATTENTION_KV_LORA_RANK,
|
||||
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
||||
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
||||
LLM_KV_ATTENTION_SCALE,
|
||||
|
||||
LLM_KV_ROPE_DIMENSION_COUNT,
|
||||
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
||||
LLM_KV_ROPE_FREQ_BASE,
|
||||
LLM_KV_ROPE_SCALE_LINEAR,
|
||||
LLM_KV_ROPE_SCALING_TYPE,
|
||||
LLM_KV_ROPE_SCALING_FACTOR,
|
||||
LLM_KV_ROPE_SCALING_ATTN_FACTOR,
|
||||
LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
|
||||
LLM_KV_ROPE_SCALING_FINETUNED,
|
||||
LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
|
||||
|
||||
LLM_KV_SPLIT_NO,
|
||||
LLM_KV_SPLIT_COUNT,
|
||||
LLM_KV_SPLIT_TENSORS_COUNT,
|
||||
|
||||
LLM_KV_SSM_INNER_SIZE,
|
||||
LLM_KV_SSM_CONV_KERNEL,
|
||||
LLM_KV_SSM_STATE_SIZE,
|
||||
LLM_KV_SSM_TIME_STEP_RANK,
|
||||
LLM_KV_SSM_DT_B_C_RMS,
|
||||
|
||||
LLM_KV_WKV_HEAD_SIZE,
|
||||
|
||||
LLM_KV_TOKENIZER_MODEL,
|
||||
LLM_KV_TOKENIZER_PRE,
|
||||
LLM_KV_TOKENIZER_LIST,
|
||||
LLM_KV_TOKENIZER_TOKEN_TYPE,
|
||||
LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
|
||||
LLM_KV_TOKENIZER_SCORES,
|
||||
LLM_KV_TOKENIZER_MERGES,
|
||||
LLM_KV_TOKENIZER_BOS_ID,
|
||||
LLM_KV_TOKENIZER_EOS_ID,
|
||||
LLM_KV_TOKENIZER_EOT_ID,
|
||||
LLM_KV_TOKENIZER_EOM_ID,
|
||||
LLM_KV_TOKENIZER_UNK_ID,
|
||||
LLM_KV_TOKENIZER_SEP_ID,
|
||||
LLM_KV_TOKENIZER_PAD_ID,
|
||||
LLM_KV_TOKENIZER_CLS_ID,
|
||||
LLM_KV_TOKENIZER_MASK_ID,
|
||||
LLM_KV_TOKENIZER_ADD_BOS,
|
||||
LLM_KV_TOKENIZER_ADD_EOS,
|
||||
LLM_KV_TOKENIZER_ADD_PREFIX,
|
||||
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
||||
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
||||
LLM_KV_TOKENIZER_HF_JSON,
|
||||
LLM_KV_TOKENIZER_RWKV,
|
||||
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
||||
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
|
||||
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
||||
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
||||
LLM_KV_TOKENIZER_FIM_MID_ID,
|
||||
LLM_KV_TOKENIZER_FIM_PAD_ID,
|
||||
LLM_KV_TOKENIZER_FIM_REP_ID,
|
||||
LLM_KV_TOKENIZER_FIM_SEP_ID,
|
||||
|
||||
LLM_KV_ADAPTER_TYPE,
|
||||
LLM_KV_ADAPTER_LORA_ALPHA,
|
||||
|
||||
LLM_KV_POSNET_EMBEDDING_LENGTH,
|
||||
LLM_KV_POSNET_BLOCK_COUNT,
|
||||
|
||||
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
||||
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
||||
|
||||
// deprecated:
|
||||
LLM_KV_TOKENIZER_PREFIX_ID,
|
||||
LLM_KV_TOKENIZER_SUFFIX_ID,
|
||||
LLM_KV_TOKENIZER_MIDDLE_ID,
|
||||
};
|
||||
|
||||
enum llm_tensor {
|
||||
LLM_TENSOR_TOKEN_EMBD,
|
||||
LLM_TENSOR_TOKEN_EMBD_NORM,
|
||||
LLM_TENSOR_TOKEN_TYPES,
|
||||
LLM_TENSOR_POS_EMBD,
|
||||
LLM_TENSOR_OUTPUT,
|
||||
LLM_TENSOR_OUTPUT_NORM,
|
||||
LLM_TENSOR_ROPE_FREQS,
|
||||
LLM_TENSOR_ROPE_FACTORS_LONG,
|
||||
LLM_TENSOR_ROPE_FACTORS_SHORT,
|
||||
LLM_TENSOR_ATTN_Q,
|
||||
LLM_TENSOR_ATTN_K,
|
||||
LLM_TENSOR_ATTN_V,
|
||||
LLM_TENSOR_ATTN_QKV,
|
||||
LLM_TENSOR_ATTN_OUT,
|
||||
LLM_TENSOR_ATTN_NORM,
|
||||
LLM_TENSOR_ATTN_NORM_2,
|
||||
LLM_TENSOR_ATTN_OUT_NORM,
|
||||
LLM_TENSOR_ATTN_POST_NORM,
|
||||
LLM_TENSOR_ATTN_ROT_EMBD,
|
||||
LLM_TENSOR_FFN_GATE_INP,
|
||||
LLM_TENSOR_FFN_GATE_INP_SHEXP,
|
||||
LLM_TENSOR_FFN_NORM,
|
||||
LLM_TENSOR_FFN_POST_NORM,
|
||||
LLM_TENSOR_FFN_GATE,
|
||||
LLM_TENSOR_FFN_DOWN,
|
||||
LLM_TENSOR_FFN_UP,
|
||||
LLM_TENSOR_FFN_ACT,
|
||||
LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
|
||||
LLM_TENSOR_FFN_GATE_EXP,
|
||||
LLM_TENSOR_FFN_UP_EXP,
|
||||
LLM_TENSOR_FFN_NORM_EXPS,
|
||||
LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
|
||||
LLM_TENSOR_FFN_GATE_EXPS,
|
||||
LLM_TENSOR_FFN_UP_EXPS,
|
||||
LLM_TENSOR_FFN_DOWN_SHEXP,
|
||||
LLM_TENSOR_FFN_GATE_SHEXP,
|
||||
LLM_TENSOR_FFN_UP_SHEXP,
|
||||
LLM_TENSOR_FFN_EXP_PROBS_B,
|
||||
LLM_TENSOR_ATTN_Q_NORM,
|
||||
LLM_TENSOR_ATTN_K_NORM,
|
||||
LLM_TENSOR_LAYER_OUT_NORM,
|
||||
LLM_TENSOR_SSM_IN,
|
||||
LLM_TENSOR_SSM_CONV1D,
|
||||
LLM_TENSOR_SSM_X,
|
||||
LLM_TENSOR_SSM_DT,
|
||||
LLM_TENSOR_SSM_A,
|
||||
LLM_TENSOR_SSM_D,
|
||||
LLM_TENSOR_SSM_OUT,
|
||||
LLM_TENSOR_TIME_MIX_W1,
|
||||
LLM_TENSOR_TIME_MIX_W2,
|
||||
LLM_TENSOR_TIME_MIX_LERP_X,
|
||||
LLM_TENSOR_TIME_MIX_LERP_W,
|
||||
LLM_TENSOR_TIME_MIX_LERP_K,
|
||||
LLM_TENSOR_TIME_MIX_LERP_V,
|
||||
LLM_TENSOR_TIME_MIX_LERP_R,
|
||||
LLM_TENSOR_TIME_MIX_LERP_G,
|
||||
LLM_TENSOR_TIME_MIX_LERP_FUSED,
|
||||
LLM_TENSOR_TIME_MIX_FIRST,
|
||||
LLM_TENSOR_TIME_MIX_DECAY,
|
||||
LLM_TENSOR_TIME_MIX_DECAY_W1,
|
||||
LLM_TENSOR_TIME_MIX_DECAY_W2,
|
||||
LLM_TENSOR_TIME_MIX_KEY,
|
||||
LLM_TENSOR_TIME_MIX_VALUE,
|
||||
LLM_TENSOR_TIME_MIX_RECEPTANCE,
|
||||
LLM_TENSOR_TIME_MIX_GATE,
|
||||
LLM_TENSOR_TIME_MIX_LN,
|
||||
LLM_TENSOR_TIME_MIX_OUTPUT,
|
||||
LLM_TENSOR_CHANNEL_MIX_LERP_K,
|
||||
LLM_TENSOR_CHANNEL_MIX_LERP_R,
|
||||
LLM_TENSOR_CHANNEL_MIX_KEY,
|
||||
LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
|
||||
LLM_TENSOR_CHANNEL_MIX_VALUE,
|
||||
LLM_TENSOR_ATTN_Q_A,
|
||||
LLM_TENSOR_ATTN_Q_B,
|
||||
LLM_TENSOR_ATTN_KV_A_MQA,
|
||||
LLM_TENSOR_ATTN_KV_B,
|
||||
LLM_TENSOR_ATTN_Q_A_NORM,
|
||||
LLM_TENSOR_ATTN_KV_A_NORM,
|
||||
LLM_TENSOR_ATTN_SUB_NORM,
|
||||
LLM_TENSOR_FFN_SUB_NORM,
|
||||
LLM_TENSOR_DEC_ATTN_NORM,
|
||||
LLM_TENSOR_DEC_ATTN_Q,
|
||||
LLM_TENSOR_DEC_ATTN_K,
|
||||
LLM_TENSOR_DEC_ATTN_V,
|
||||
LLM_TENSOR_DEC_ATTN_OUT,
|
||||
LLM_TENSOR_DEC_ATTN_REL_B,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_NORM,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_Q,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_K,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_V,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_OUT,
|
||||
LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
|
||||
LLM_TENSOR_DEC_FFN_NORM,
|
||||
LLM_TENSOR_DEC_FFN_GATE,
|
||||
LLM_TENSOR_DEC_FFN_DOWN,
|
||||
LLM_TENSOR_DEC_FFN_UP,
|
||||
LLM_TENSOR_DEC_OUTPUT_NORM,
|
||||
LLM_TENSOR_ENC_ATTN_NORM,
|
||||
LLM_TENSOR_ENC_ATTN_Q,
|
||||
LLM_TENSOR_ENC_ATTN_K,
|
||||
LLM_TENSOR_ENC_ATTN_V,
|
||||
LLM_TENSOR_ENC_ATTN_OUT,
|
||||
LLM_TENSOR_ENC_ATTN_REL_B,
|
||||
LLM_TENSOR_ENC_FFN_NORM,
|
||||
LLM_TENSOR_ENC_FFN_GATE,
|
||||
LLM_TENSOR_ENC_FFN_DOWN,
|
||||
LLM_TENSOR_ENC_FFN_UP,
|
||||
LLM_TENSOR_ENC_OUTPUT_NORM,
|
||||
LLM_TENSOR_CLS,
|
||||
LLM_TENSOR_CLS_OUT,
|
||||
LLM_TENSOR_CONV1D,
|
||||
LLM_TENSOR_CONVNEXT_DW,
|
||||
LLM_TENSOR_CONVNEXT_NORM,
|
||||
LLM_TENSOR_CONVNEXT_PW1,
|
||||
LLM_TENSOR_CONVNEXT_PW2,
|
||||
LLM_TENSOR_CONVNEXT_GAMMA,
|
||||
LLM_TENSOR_POS_NET_CONV1,
|
||||
LLM_TENSOR_POS_NET_CONV2,
|
||||
LLM_TENSOR_POS_NET_NORM,
|
||||
LLM_TENSOR_POS_NET_NORM1,
|
||||
LLM_TENSOR_POS_NET_NORM2,
|
||||
LLM_TENSOR_POS_NET_ATTN_NORM,
|
||||
LLM_TENSOR_POS_NET_ATTN_Q,
|
||||
LLM_TENSOR_POS_NET_ATTN_K,
|
||||
LLM_TENSOR_POS_NET_ATTN_V,
|
||||
LLM_TENSOR_POS_NET_ATTN_OUT,
|
||||
};
|
||||
|
||||
enum llm_tensor_layer {
|
||||
LLM_TENSOR_LAYER_INPUT,
|
||||
LLM_TENSOR_LAYER_REPEATING,
|
||||
LLM_TENSOR_LAYER_OUTPUT,
|
||||
};
|
||||
|
||||
struct LLM_KV {
|
||||
LLM_KV(llm_arch arch, const char * suffix = nullptr);
|
||||
|
||||
llm_arch arch;
|
||||
const char * suffix;
|
||||
|
||||
std::string operator()(llm_kv kv) const;
|
||||
};
|
||||
|
||||
// helper to handle gguf constants
|
||||
// usage:
|
||||
//
|
||||
// const auto tn = LLM_TN(LLM_ARCH_LLAMA);
|
||||
//
|
||||
// std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
|
||||
// std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
|
||||
// std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
|
||||
//
|
||||
struct LLM_TN_IMPL {
|
||||
const llm_arch arch;
|
||||
const llm_tensor tensor;
|
||||
const char * const suffix;
|
||||
const int bid;
|
||||
const int xid;
|
||||
|
||||
std::string str() const;
|
||||
|
||||
operator std::string() const {
|
||||
return str();
|
||||
}
|
||||
|
||||
friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
|
||||
return str == tn.str();
|
||||
}
|
||||
|
||||
friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
|
||||
return str != tn.str();
|
||||
}
|
||||
};
|
||||
|
||||
struct LLM_TN {
|
||||
LLM_TN(llm_arch arch) : arch(arch) {}
|
||||
|
||||
llm_arch arch;
|
||||
|
||||
LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
|
||||
return { arch, tensor, suffix, bid, xid };
|
||||
}
|
||||
|
||||
LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
|
||||
return { arch, tensor, nullptr, bid, xid };
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct llm_tensor_info {
|
||||
llm_tensor_layer layer;
|
||||
ggml_op op;
|
||||
};
|
||||
|
||||
const char * llm_arch_name(llm_arch arch);
|
||||
|
||||
llm_arch llm_arch_from_string(const std::string & name);
|
||||
|
||||
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
368
examples/talk-llama/llama-batch.cpp
Normal file
368
examples/talk-llama/llama-batch.cpp
Normal file
@ -0,0 +1,368 @@
|
||||
#include "llama-batch.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
llama_ubatch llama_sbatch::reserve_ubatch(size_t n_ubatch, bool has_embd) {
|
||||
// clear empty sequences
|
||||
// the previous ubatch is assumed to be gone,
|
||||
// so nothing should refer to values in these sequences anymore.
|
||||
for (size_t i = seq.size(); i-- > 0;) {
|
||||
if (seq[i].length == 0) {
|
||||
seq.pop_back();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ubatch_token.resize(!has_embd ? n_ubatch : 0);
|
||||
ubatch_embd.resize(has_embd ? n_embd * n_ubatch : 0);
|
||||
ubatch_pos.resize(n_ubatch);
|
||||
ubatch_n_seq_id.resize(n_ubatch);
|
||||
ubatch_seq_id.resize(n_ubatch);
|
||||
ubatch_output.resize(n_ubatch);
|
||||
llama_ubatch ubatch = {
|
||||
/*equal_seqs =*/ true,
|
||||
/*n_tokens =*/ 0,
|
||||
/*n_seq_tokens =*/ 0,
|
||||
/*n_seqs =*/ 0,
|
||||
/*token =*/ !has_embd ? ubatch_token.data() : nullptr,
|
||||
/*embd =*/ has_embd ? ubatch_embd.data() : nullptr,
|
||||
/*pos =*/ ubatch_pos.data(),
|
||||
/*n_seq_id =*/ ubatch_n_seq_id.data(),
|
||||
/*seq_id =*/ ubatch_seq_id.data(),
|
||||
/*output =*/ ubatch_output.data(),
|
||||
};
|
||||
return ubatch;
|
||||
}
|
||||
|
||||
void llama_sbatch::add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length) {
|
||||
GGML_ASSERT(batch != nullptr);
|
||||
GGML_ASSERT(length <= seq.length);
|
||||
// Can only add sequences of equal lengths to a batch,
|
||||
// otherwise it isn't clear to which sequence a token belongs
|
||||
GGML_ASSERT(seq.n_seq_id == 0 || ubatch.n_seqs == 0 || length == (size_t) ubatch.n_tokens / ubatch.n_seqs);
|
||||
GGML_ASSERT((seq.n_seq_id != 0) == ubatch.equal_seqs);
|
||||
// NOTE: loops are separated for cache-friendliness
|
||||
if (batch->token) {
|
||||
if (ubatch.equal_seqs) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ubatch.token[ubatch.n_tokens + i] = batch->token[ids[seq.offset + i]];
|
||||
}
|
||||
} else {
|
||||
// simple split
|
||||
ubatch.token = batch->token + seq.offset;
|
||||
}
|
||||
} else {
|
||||
ubatch.token = nullptr;
|
||||
}
|
||||
if (batch->embd) {
|
||||
if (ubatch.equal_seqs) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
memcpy(
|
||||
ubatch.embd + (n_embd * (ubatch.n_tokens + i)),
|
||||
batch->embd + (n_embd * ids[seq.offset + i]),
|
||||
n_embd * sizeof(float)
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// simple split
|
||||
ubatch.embd = batch->embd + (n_embd * seq.offset);
|
||||
}
|
||||
} else {
|
||||
ubatch.embd = nullptr;
|
||||
}
|
||||
if (ubatch.equal_seqs) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ubatch.pos[ubatch.n_tokens + i] = batch->pos[ids[seq.offset + i]];
|
||||
}
|
||||
} else {
|
||||
// simple split
|
||||
ubatch.pos = batch->pos + seq.offset;
|
||||
}
|
||||
if (ubatch.equal_seqs) {
|
||||
ubatch.n_seq_id[ubatch.n_seqs] = seq.n_seq_id;
|
||||
if (seq.seq_id) {
|
||||
ubatch.seq_id[ubatch.n_seqs] = seq.seq_id;
|
||||
}
|
||||
} else {
|
||||
// simple split
|
||||
if (batch->n_seq_id) {
|
||||
ubatch.n_seq_id = batch->n_seq_id + seq.offset;
|
||||
} else {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ubatch.n_seq_id[ubatch.n_seqs + i] = 1;
|
||||
}
|
||||
}
|
||||
if (batch->seq_id) {
|
||||
ubatch.seq_id = batch->seq_id + seq.offset;
|
||||
}
|
||||
}
|
||||
if (logits_all) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ubatch.output[ubatch.n_tokens + i] = 1;
|
||||
out_ids.push_back(ids[seq.offset + i]);
|
||||
}
|
||||
} else if (batch->logits) {
|
||||
if (ubatch.equal_seqs) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
size_t id = ids[seq.offset + i];
|
||||
int8_t is_output = batch->logits[id];
|
||||
ubatch.output[ubatch.n_tokens + i] = is_output;
|
||||
if (is_output) { out_ids.push_back(id); }
|
||||
}
|
||||
} else {
|
||||
// simple split
|
||||
ubatch.output = batch->logits + seq.offset;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (ubatch.output[i] != 0) { out_ids.push_back(seq.offset + i); }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// only get last output
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
size_t id = ids[seq.offset + i];
|
||||
int8_t is_last = id == ids.size() - 1;
|
||||
ubatch.output[ubatch.n_tokens + i] = is_last;
|
||||
if (is_last) { out_ids.push_back(id); }
|
||||
}
|
||||
}
|
||||
if (ubatch.n_tokens == 0 && ubatch.n_seqs == 0) {
|
||||
ubatch.n_seq_tokens = ubatch.equal_seqs ? length : 1;
|
||||
}
|
||||
ubatch.n_tokens += length;
|
||||
ubatch.n_seqs += ubatch.equal_seqs ? 1 : length; // virtual sequences for simple splits
|
||||
seq.offset += length;
|
||||
seq.length -= length;
|
||||
n_tokens -= length;
|
||||
GGML_ASSERT(ubatch.n_tokens == ubatch.n_seq_tokens * ubatch.n_seqs);
|
||||
}
|
||||
|
||||
llama_ubatch llama_sbatch::split_simple(size_t n_ubatch) {
|
||||
n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch;
|
||||
llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr);
|
||||
ubatch.equal_seqs = false;
|
||||
if (!seq.empty()) {
|
||||
llama_sbatch_seq & s = seq[0];
|
||||
size_t length = s.length < n_ubatch ? s.length : n_ubatch;
|
||||
GGML_ASSERT(seq.size() == 1 && s.n_seq_id == 0); // don't mix with other splits
|
||||
add_seq_to_ubatch(ubatch, s, length);
|
||||
}
|
||||
return ubatch;
|
||||
}
|
||||
|
||||
llama_ubatch llama_sbatch::split_equal(size_t n_ubatch) {
|
||||
n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch;
|
||||
llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr);
|
||||
if (!seq.empty()) {
|
||||
size_t length = 0;
|
||||
size_t n_tokens_in_ubatch = 0;
|
||||
GGML_ASSERT(seq[0].n_seq_id > 0); // should not be mixed with simple splits
|
||||
// smallest first, because it's easier to split this way;
|
||||
// starting from the end to pop in constant time.
|
||||
for (size_t i = seq.size(); i-- > 0;) {
|
||||
llama_sbatch_seq & s = seq[i];
|
||||
GGML_ASSERT(s.length > 0);
|
||||
if (length == 0) {
|
||||
length = s.length < n_ubatch ? s.length : n_ubatch;
|
||||
}
|
||||
add_seq_to_ubatch(ubatch, s, length);
|
||||
n_tokens_in_ubatch += length;
|
||||
// shared prompts can't be mixed with any of their sequences,
|
||||
// so it's safer to compute them in their own ubatch
|
||||
if (s.n_seq_id > 1) { break; }
|
||||
// stop when there isn't enough space for another sequence
|
||||
if (length + n_tokens_in_ubatch > n_ubatch) { break; }
|
||||
}
|
||||
}
|
||||
return ubatch;
|
||||
}
|
||||
|
||||
llama_ubatch llama_sbatch::split_seq(size_t n_ubatch) {
|
||||
n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch;
|
||||
llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr);
|
||||
if (!seq.empty()) {
|
||||
llama_sbatch_seq & s = seq[seq.size() - 1];
|
||||
size_t length = s.length < n_ubatch ? s.length : n_ubatch;
|
||||
GGML_ASSERT(s.n_seq_id > 0); // should not be mixed with simple splits
|
||||
add_seq_to_ubatch(ubatch, s, length);
|
||||
}
|
||||
return ubatch;
|
||||
}
|
||||
|
||||
void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool simple_split, bool logits_all) {
|
||||
GGML_ASSERT(batch.n_tokens >= 0);
|
||||
this->batch = &batch;
|
||||
this->n_embd = n_embd;
|
||||
this->logits_all = logits_all;
|
||||
|
||||
n_tokens = batch.n_tokens;
|
||||
ids.resize(n_tokens);
|
||||
out_ids.clear();
|
||||
// TODO: reserve out_ids and seq
|
||||
|
||||
for (size_t i = 0; i < n_tokens; ++i) {
|
||||
ids[i] = i;
|
||||
}
|
||||
if (simple_split) {
|
||||
seq.resize(1);
|
||||
llama_sbatch_seq & s = seq[0];
|
||||
s.n_seq_id = 0;
|
||||
s.seq_id = nullptr;
|
||||
s.offset = 0;
|
||||
s.length = n_tokens;
|
||||
return;
|
||||
}
|
||||
std::sort(ids.begin(), ids.end(),
|
||||
[&batch](size_t a, size_t b) {
|
||||
int32_t n_seq_a = batch.n_seq_id ? batch.n_seq_id[a] : 1;
|
||||
int32_t n_seq_b = batch.n_seq_id ? batch.n_seq_id[b] : 1;
|
||||
// sort by seq_id, then by pos
|
||||
if (n_seq_a == n_seq_b) {
|
||||
if (batch.seq_id) {
|
||||
for (int32_t i = 0; i < n_seq_a; ++i) {
|
||||
llama_seq_id seq_id_a = batch.seq_id[a][i];
|
||||
llama_seq_id seq_id_b = batch.seq_id[b][i];
|
||||
// smaller seq_ids go first
|
||||
if (seq_id_a != seq_id_b) {
|
||||
return seq_id_a < seq_id_b;
|
||||
}
|
||||
}
|
||||
}
|
||||
// when all else is equal, sort by pos
|
||||
if (batch.pos) {
|
||||
return batch.pos[a] < batch.pos[b];
|
||||
}
|
||||
// no pos, sort by id
|
||||
return a < b;
|
||||
}
|
||||
// shared prompts go first
|
||||
return n_seq_a > n_seq_b;
|
||||
}
|
||||
);
|
||||
// init seq
|
||||
llama_sbatch_seq * last_seq = nullptr;
|
||||
|
||||
for (size_t i = 0; i < n_tokens; ++i) {
|
||||
const size_t bi = ids[i];
|
||||
const int32_t n_seqs = batch.n_seq_id[bi];
|
||||
llama_seq_id * seq_ids = batch.seq_id[bi];
|
||||
if (last_seq != nullptr) {
|
||||
bool same = n_seqs == last_seq->n_seq_id;
|
||||
for (int32_t j = 0; same && j < n_seqs; ++j) {
|
||||
if (seq_ids[j] != last_seq->seq_id[j]) {
|
||||
same = false;
|
||||
}
|
||||
}
|
||||
if (same) {
|
||||
last_seq->length += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
llama_sbatch_seq new_seq = {n_seqs, seq_ids, i, 1};
|
||||
seq.push_back(new_seq);
|
||||
last_seq = &seq.back();
|
||||
}
|
||||
// keep shared prompts first at the end, then sort by length descending.
|
||||
std::sort(seq.begin(), seq.end(),
|
||||
[](llama_sbatch_seq & a, llama_sbatch_seq & b) {
|
||||
if (a.n_seq_id == b.n_seq_id) {
|
||||
return a.length > b.length;
|
||||
}
|
||||
return a.n_seq_id < b.n_seq_id;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
llama_batch_allocr::llama_batch_allocr(struct llama_batch in_batch, llama_pos p0) {
|
||||
batch = in_batch;
|
||||
GGML_ASSERT(batch.n_tokens > 0);
|
||||
if (!batch.pos) {
|
||||
pos.resize(batch.n_tokens);
|
||||
for (int32_t i = 0; i < batch.n_tokens; i++) {
|
||||
pos[i] = i + p0;
|
||||
}
|
||||
batch.pos = pos.data();
|
||||
}
|
||||
if (!batch.n_seq_id) {
|
||||
n_seq_id.resize(batch.n_tokens);
|
||||
for (int32_t i = 0; i < batch.n_tokens; i++) {
|
||||
n_seq_id[i] = seq_id_0.size();
|
||||
}
|
||||
batch.n_seq_id = n_seq_id.data();
|
||||
}
|
||||
if (!batch.seq_id) {
|
||||
seq_id.resize(batch.n_tokens + 1);
|
||||
seq_id[batch.n_tokens] = NULL;
|
||||
for (int32_t i = 0; i < batch.n_tokens; i++) {
|
||||
seq_id[i] = seq_id_0.data();
|
||||
}
|
||||
batch.seq_id = seq_id.data();
|
||||
}
|
||||
if (!batch.logits) {
|
||||
logits.resize(batch.n_tokens);
|
||||
logits[logits.size() - 1] = true;
|
||||
batch.logits = logits.data();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// interface implementation
|
||||
//
|
||||
|
||||
struct llama_batch llama_batch_get_one(
|
||||
llama_token * tokens,
|
||||
int32_t n_tokens) {
|
||||
return {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ tokens,
|
||||
/*embd =*/ nullptr,
|
||||
/*pos =*/ nullptr,
|
||||
/*n_seq_id =*/ nullptr,
|
||||
/*seq_id =*/ nullptr,
|
||||
/*logits =*/ nullptr,
|
||||
};
|
||||
}
|
||||
|
||||
struct llama_batch llama_batch_init(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
||||
llama_batch batch = {
|
||||
/*n_tokens =*/ 0,
|
||||
/*tokens =*/ nullptr,
|
||||
/*embd =*/ nullptr,
|
||||
/*pos =*/ nullptr,
|
||||
/*n_seq_id =*/ nullptr,
|
||||
/*seq_id =*/ nullptr,
|
||||
/*logits =*/ nullptr,
|
||||
};
|
||||
|
||||
if (embd) {
|
||||
batch.embd = (float *) malloc(sizeof(float) * n_tokens_alloc * embd);
|
||||
} else {
|
||||
batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens_alloc);
|
||||
}
|
||||
|
||||
batch.pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens_alloc);
|
||||
batch.n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens_alloc);
|
||||
batch.seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * (n_tokens_alloc + 1));
|
||||
for (int i = 0; i < n_tokens_alloc; ++i) {
|
||||
batch.seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max);
|
||||
}
|
||||
batch.seq_id[n_tokens_alloc] = nullptr;
|
||||
|
||||
batch.logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens_alloc);
|
||||
|
||||
return batch;
|
||||
}
|
||||
|
||||
void llama_batch_free(struct llama_batch batch) {
|
||||
if (batch.token) free(batch.token);
|
||||
if (batch.embd) free(batch.embd);
|
||||
if (batch.pos) free(batch.pos);
|
||||
if (batch.n_seq_id) free(batch.n_seq_id);
|
||||
if (batch.seq_id) {
|
||||
for (int i = 0; batch.seq_id[i] != nullptr; ++i) {
|
||||
free(batch.seq_id[i]);
|
||||
}
|
||||
free(batch.seq_id);
|
||||
}
|
||||
if (batch.logits) free(batch.logits);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user