mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-05 17:01:38 +02:00
Compare commits
1 Commits
gg/objc
...
gg/disable
Author | SHA1 | Date | |
---|---|---|---|
ceb77363cd |
@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|||||||
ARG CUDA_DOCKER_ARCH=all
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y build-essential git cmake libsdl2-dev
|
apt-get install -y build-essential git cmake
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|||||||
ENV GGML_CUDA=1
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y build-essential libsdl2-dev \
|
apt-get install -y build-essential \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
# Ref: https://stackoverflow.com/a/53464012
|
# Ref: https://stackoverflow.com/a/53464012
|
||||||
|
@ -12,7 +12,7 @@ FROM ubuntu:22.04 AS runtime
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y curl ffmpeg libsdl2-dev \
|
apt-get install -y curl ffmpeg \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
COPY --from=build /app /app
|
COPY --from=build /app /app
|
||||||
|
6
.github/workflows/bindings-go.yml
vendored
6
.github/workflows/bindings-go.yml
vendored
@ -13,10 +13,10 @@ jobs:
|
|||||||
ubuntu-latest:
|
ubuntu-latest:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v3
|
||||||
with:
|
with:
|
||||||
go-version: '^1.23'
|
go-version: '^1.19'
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v1
|
||||||
- run: |
|
- run: |
|
||||||
cd bindings/go
|
cd bindings/go
|
||||||
make test
|
make test
|
||||||
|
65
.github/workflows/bindings-ruby.yml
vendored
65
.github/workflows/bindings-ruby.yml
vendored
@ -3,73 +3,20 @@ on:
|
|||||||
push:
|
push:
|
||||||
paths:
|
paths:
|
||||||
- bindings/ruby/**
|
- bindings/ruby/**
|
||||||
- src/whisper.cpp
|
- whisper.h
|
||||||
- include/whisper.h
|
|
||||||
- ggml/src/ggml.c
|
|
||||||
- ggml/src/ggml-impl.h
|
|
||||||
- ggml/src/ggml-aarch64.h
|
|
||||||
- ggml/src/ggml-aarch64.c
|
|
||||||
- ggml/src/ggml-alloc.c
|
|
||||||
- ggml/src/ggml-backend-impl.h
|
|
||||||
- ggml/src/ggml-backend.cpp
|
|
||||||
- ggml/src/ggml-common.h
|
|
||||||
- ggml/src/ggml-quants.h
|
|
||||||
- ggml/src/ggml-quants.c
|
|
||||||
- ggml/src/ggml-cpu-impl.h
|
|
||||||
- ggml/src/ggml-metal.m
|
|
||||||
- ggml/src/ggml-metal.metal
|
|
||||||
- ggml/src/ggml-blas.cpp
|
|
||||||
- ggml/include/ggml.h
|
|
||||||
- ggml/include/ggml-alloc.h
|
|
||||||
- ggml/include/ggml-backend.h
|
|
||||||
- ggml/include/ggml-cuda.h
|
|
||||||
- ggml/include/ggml-kompute.h
|
|
||||||
- ggml/include/ggml-metal.h
|
|
||||||
- ggml/include/ggml-sycl.h
|
|
||||||
- ggml/include/ggml-vulkan.h
|
|
||||||
- ggml/include/ggml-blas.h
|
|
||||||
- scripts/get-flags.mk
|
|
||||||
- examples/dr_wav.h
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- bindings/ruby/**
|
- bindings/ruby/**
|
||||||
- src/whisper.cpp
|
- whisper.h
|
||||||
- include/whisper.h
|
|
||||||
- ggml/src/ggml.c
|
|
||||||
- ggml/src/ggml-impl.h
|
|
||||||
- ggml/src/ggml-aarch64.h
|
|
||||||
- ggml/src/ggml-aarch64.c
|
|
||||||
- ggml/src/ggml-alloc.c
|
|
||||||
- ggml/src/ggml-backend-impl.h
|
|
||||||
- ggml/src/ggml-backend.cpp
|
|
||||||
- ggml/src/ggml-common.h
|
|
||||||
- ggml/src/ggml-quants.h
|
|
||||||
- ggml/src/ggml-quants.c
|
|
||||||
- ggml/src/ggml-cpu-impl.h
|
|
||||||
- ggml/src/ggml-metal.m
|
|
||||||
- ggml/src/ggml-metal.metal
|
|
||||||
- ggml/src/ggml-blas.cpp
|
|
||||||
- ggml/include/ggml.h
|
|
||||||
- ggml/include/ggml-alloc.h
|
|
||||||
- ggml/include/ggml-backend.h
|
|
||||||
- ggml/include/ggml-cuda.h
|
|
||||||
- ggml/include/ggml-kompute.h
|
|
||||||
- ggml/include/ggml-metal.h
|
|
||||||
- ggml/include/ggml-sycl.h
|
|
||||||
- ggml/include/ggml-vulkan.h
|
|
||||||
- ggml/include/ggml-blas.h
|
|
||||||
- scripts/get-flags.mk
|
|
||||||
- examples/dr_wav.h
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
ubuntu-latest:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
working-directory: bindings/ruby
|
|
||||||
steps:
|
steps:
|
||||||
- uses: ruby/setup-ruby@v1
|
- uses: ruby/setup-ruby@v1
|
||||||
with:
|
with:
|
||||||
ruby-version: '3.0'
|
ruby-version: '3.0'
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v1
|
||||||
- run: rake test
|
- run: |
|
||||||
|
cd bindings/ruby/ext
|
||||||
|
ruby extconf.rb && make
|
||||||
|
179
.github/workflows/build.yml
vendored
179
.github/workflows/build.yml
vendored
@ -3,7 +3,6 @@ on: [push, pull_request]
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
ubuntu_image: "ubuntu:22.04"
|
ubuntu_image: "ubuntu:22.04"
|
||||||
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
ubuntu-latest:
|
||||||
@ -60,7 +59,7 @@ jobs:
|
|||||||
uses: cross-platform-actions/action@v0.24.0
|
uses: cross-platform-actions/action@v0.24.0
|
||||||
with:
|
with:
|
||||||
operating_system: freebsd
|
operating_system: freebsd
|
||||||
version: '13.3'
|
version: '13.2'
|
||||||
run: |
|
run: |
|
||||||
sudo pkg update
|
sudo pkg update
|
||||||
sudo pkg install -y gmake sdl2
|
sudo pkg install -y gmake sdl2
|
||||||
@ -309,7 +308,7 @@ jobs:
|
|||||||
- name: Build using CMake w/ OpenBLAS
|
- name: Build using CMake w/ OpenBLAS
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
cmake -B build -DGGML_OPENBLAS=ON
|
||||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
windows:
|
windows:
|
||||||
@ -383,8 +382,10 @@ jobs:
|
|||||||
sdl2: [ON]
|
sdl2: [ON]
|
||||||
include:
|
include:
|
||||||
- arch: Win32
|
- arch: Win32
|
||||||
|
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
|
||||||
s2arc: x86
|
s2arc: x86
|
||||||
- arch: x64
|
- arch: x64
|
||||||
|
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
|
||||||
s2arc: x64
|
s2arc: x64
|
||||||
- sdl2: ON
|
- sdl2: ON
|
||||||
s2ver: 2.28.5
|
s2ver: 2.28.5
|
||||||
@ -393,21 +394,17 @@ jobs:
|
|||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Export GitHub Actions cache environment variables
|
|
||||||
uses: actions/github-script@v7
|
|
||||||
with:
|
|
||||||
script: |
|
|
||||||
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
|
|
||||||
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
|
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v2
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Install OpenBLAS and pkgconfiglite
|
- name: Fetch OpenBLAS
|
||||||
if: matrix.blas == 'ON'
|
if: matrix.blas == 'ON'
|
||||||
run: |
|
run: |
|
||||||
vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas
|
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
||||||
choco install pkgconfiglite
|
7z x blas.zip -oblas -y
|
||||||
|
copy blas/include/cblas.h .
|
||||||
|
copy blas/include/openblas_config.h .
|
||||||
|
echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
||||||
|
|
||||||
- name: Fetch SDL2 and set SDL2_DIR
|
- name: Fetch SDL2 and set SDL2_DIR
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -419,10 +416,9 @@ jobs:
|
|||||||
- name: Configure
|
- name: Configure
|
||||||
run: >
|
run: >
|
||||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||||
-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
|
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
-DGGML_BLAS=${{ matrix.blas }}
|
-DGGML_OPENBLAS=${{ matrix.blas }}
|
||||||
-DGGML_BLAS_VENDOR=OpenBLAS
|
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
|
||||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
@ -430,9 +426,9 @@ jobs:
|
|||||||
cd ./build
|
cd ./build
|
||||||
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
||||||
|
|
||||||
- name: Copy openblas.dll
|
- name: Copy libopenblas.dll
|
||||||
if: matrix.blas == 'ON'
|
if: matrix.blas == 'ON'
|
||||||
run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }}
|
run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
- name: Copy SDL2.dll
|
- name: Copy SDL2.dll
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -549,9 +545,8 @@ jobs:
|
|||||||
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
|
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
|
||||||
mkdir models/ggml-base.en-encoder.mlmodelc
|
mkdir models/ggml-base.en-encoder.mlmodelc
|
||||||
|
|
||||||
# TODO: disabled because it fails for some reason with Github Actions
|
- name: Build objc example
|
||||||
# - name: Build objc example
|
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
||||||
# run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
|
||||||
|
|
||||||
- name: Build swiftui example
|
- name: Build swiftui example
|
||||||
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
||||||
@ -565,6 +560,12 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
path: whisper
|
path: whisper
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
repository: ggerganov/ggml
|
||||||
|
path: ggml
|
||||||
|
|
||||||
- name: Install Java
|
- name: Install Java
|
||||||
uses: actions/setup-java@v4
|
uses: actions/setup-java@v4
|
||||||
with:
|
with:
|
||||||
@ -583,77 +584,75 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
export PATH_TO_GGML=$PWD/ggml
|
export PATH_TO_GGML=$PWD/ggml
|
||||||
cd whisper/examples/whisper.android
|
cd whisper/examples/whisper.android
|
||||||
./gradlew assembleRelease --no-daemon
|
./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
|
||||||
|
|
||||||
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
|
android_java:
|
||||||
# android_java:
|
runs-on: ubuntu-latest
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
#
|
|
||||||
# steps:
|
|
||||||
# - name: Clone
|
|
||||||
# uses: actions/checkout@v4
|
|
||||||
#
|
|
||||||
# - name: set up JDK 11
|
|
||||||
# uses: actions/setup-java@v4
|
|
||||||
# with:
|
|
||||||
# java-version: '11'
|
|
||||||
# distribution: 'temurin'
|
|
||||||
# cache: gradle
|
|
||||||
#
|
|
||||||
# - name: Setup Android SDK
|
|
||||||
# uses: android-actions/setup-android@v3
|
|
||||||
# with:
|
|
||||||
# cmdline-tools-version: 9.0
|
|
||||||
#
|
|
||||||
# - name: Build
|
|
||||||
# run: |
|
|
||||||
# cd examples/whisper.android.java
|
|
||||||
# chmod +x ./gradlew
|
|
||||||
# ./gradlew assembleRelease
|
|
||||||
|
|
||||||
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
|
steps:
|
||||||
# java:
|
- name: Clone
|
||||||
# needs: [ 'windows' ]
|
uses: actions/checkout@v4
|
||||||
# runs-on: windows-latest
|
|
||||||
# steps:
|
- name: set up JDK 11
|
||||||
# - uses: actions/checkout@v4
|
uses: actions/setup-java@v4
|
||||||
#
|
with:
|
||||||
# - name: Install Java
|
java-version: '11'
|
||||||
# uses: actions/setup-java@v4
|
distribution: 'temurin'
|
||||||
# with:
|
cache: gradle
|
||||||
# distribution: zulu
|
|
||||||
# java-version: 20
|
- name: Setup Android SDK
|
||||||
#
|
uses: android-actions/setup-android@v3
|
||||||
# - name: Download Windows lib
|
with:
|
||||||
# uses: actions/download-artifact@v4
|
cmdline-tools-version: 9.0
|
||||||
# with:
|
|
||||||
# name: win32-x86-64_whisper.dll
|
- name: Build
|
||||||
# path: bindings/java/build/generated/resources/main/win32-x86-64
|
run: |
|
||||||
#
|
cd examples/whisper.android.java
|
||||||
# - name: Build
|
chmod +x ./gradlew
|
||||||
# run: |
|
./gradlew assembleRelease
|
||||||
# models\download-ggml-model.cmd tiny.en
|
|
||||||
# cd bindings/java
|
java:
|
||||||
# chmod +x ./gradlew
|
needs: [ 'windows' ]
|
||||||
# ./gradlew build
|
runs-on: windows-latest
|
||||||
#
|
steps:
|
||||||
# - name: Upload jar
|
- uses: actions/checkout@v4
|
||||||
# uses: actions/upload-artifact@v4
|
|
||||||
# with:
|
- name: Install Java
|
||||||
# name: whispercpp.jar
|
uses: actions/setup-java@v4
|
||||||
# path: bindings/java/build/libs/whispercpp-*.jar
|
with:
|
||||||
#
|
distribution: zulu
|
||||||
# - name: Publish package
|
java-version: 20
|
||||||
# if: ${{ github.ref == 'refs/heads/master' }}
|
|
||||||
# uses: gradle/gradle-build-action@v2.4.2
|
- name: Download Windows lib
|
||||||
# with:
|
uses: actions/download-artifact@v4
|
||||||
# arguments: publish
|
with:
|
||||||
# build-root-directory: bindings/java
|
name: win32-x86-64_whisper.dll
|
||||||
# env:
|
path: bindings/java/build/generated/resources/main/win32-x86-64
|
||||||
# MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
|
|
||||||
# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
|
- name: Build
|
||||||
# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
|
run: |
|
||||||
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
models\download-ggml-model.cmd tiny.en
|
||||||
|
cd bindings/java
|
||||||
|
chmod +x ./gradlew
|
||||||
|
./gradlew build
|
||||||
|
|
||||||
|
- name: Upload jar
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: whispercpp.jar
|
||||||
|
path: bindings/java/build/libs/whispercpp-*.jar
|
||||||
|
|
||||||
|
- name: Publish package
|
||||||
|
if: ${{ github.ref == 'refs/heads/master' }}
|
||||||
|
uses: gradle/gradle-build-action@v2.4.2
|
||||||
|
with:
|
||||||
|
arguments: publish
|
||||||
|
build-root-directory: bindings/java
|
||||||
|
env:
|
||||||
|
MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
|
||||||
|
MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
|
||||||
|
PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||||
|
PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||||
|
|
||||||
quantize:
|
quantize:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
8
.github/workflows/docker.yml
vendored
8
.github/workflows/docker.yml
vendored
@ -18,9 +18,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
|
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
|
||||||
#TODO: the cuda image keeps failing - disable for now
|
- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
||||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
|
||||||
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out the repo
|
- name: Check out the repo
|
||||||
@ -45,7 +43,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
platforms: ${{ matrix.config.platform }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
|
||||||
@ -54,6 +52,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: ${{ github.event_name == 'push' }}
|
push: ${{ github.event_name == 'push' }}
|
||||||
platforms: ${{ matrix.config.platform }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,16 +1,13 @@
|
|||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
*.d
|
|
||||||
.cache/
|
.cache/
|
||||||
.coreml/
|
.coreml/
|
||||||
.test/
|
.test/
|
||||||
.venv/
|
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.vimspector.json
|
.vimspector.json
|
||||||
/CMakeSettings.json
|
/CMakeSettings.json
|
||||||
/talk-llama.dSYM/
|
|
||||||
|
|
||||||
build/
|
build/
|
||||||
build-*/
|
build-*/
|
||||||
@ -20,9 +17,6 @@ build-*/
|
|||||||
.swiftpm
|
.swiftpm
|
||||||
*.metallib
|
*.metallib
|
||||||
|
|
||||||
ggml-metal-embed.metal
|
|
||||||
ggml-metal-embed.metal.tmp
|
|
||||||
|
|
||||||
/main
|
/main
|
||||||
/stream
|
/stream
|
||||||
/command
|
/command
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||||
project("whisper.cpp" C CXX)
|
project("whisper.cpp" C CXX)
|
||||||
project("whisper.cpp" VERSION 1.7.2)
|
project("whisper.cpp" VERSION 1.6.2)
|
||||||
include(CheckIncludeFileCXX)
|
include(CheckIncludeFileCXX)
|
||||||
|
|
||||||
set(SOVERSION 1)
|
set(SOVERSION 1)
|
||||||
@ -120,10 +120,7 @@ whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
|||||||
# build the library
|
# build the library
|
||||||
#
|
#
|
||||||
|
|
||||||
if (NOT TARGET ggml)
|
add_subdirectory(ggml)
|
||||||
add_subdirectory(ggml)
|
|
||||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
||||||
endif()
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -164,6 +161,18 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
|||||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||||
|
|
||||||
|
install(
|
||||||
|
FILES convert-hf-to-gguf.py
|
||||||
|
PERMISSIONS
|
||||||
|
OWNER_READ
|
||||||
|
OWNER_WRITE
|
||||||
|
OWNER_EXECUTE
|
||||||
|
GROUP_READ
|
||||||
|
GROUP_EXECUTE
|
||||||
|
WORLD_READ
|
||||||
|
WORLD_EXECUTE
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||||
|
|
||||||
configure_file(cmake/whisper.pc.in
|
configure_file(cmake/whisper.pc.in
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||||
@ONLY)
|
@ONLY)
|
||||||
|
336
Makefile
336
Makefile
@ -3,11 +3,12 @@ BUILD_TARGETS = \
|
|||||||
main \
|
main \
|
||||||
bench \
|
bench \
|
||||||
quantize \
|
quantize \
|
||||||
server
|
server \
|
||||||
|
tests/test-c.o
|
||||||
|
|
||||||
# Binaries only useful for tests
|
# Binaries only useful for tests
|
||||||
TEST_TARGETS = \
|
TEST_TARGETS = \
|
||||||
tests/test-c.o
|
tests/test-backend-ops
|
||||||
|
|
||||||
# Deprecation aliases
|
# Deprecation aliases
|
||||||
ifdef WHISPER_CUBLAS
|
ifdef WHISPER_CUBLAS
|
||||||
@ -134,18 +135,14 @@ ifdef GGML_RPC
|
|||||||
BUILD_TARGETS += rpc-server
|
BUILD_TARGETS += rpc-server
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef GGML_VULKAN
|
|
||||||
BUILD_TARGETS += vulkan-shaders-gen
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(shell sdl2-config --cflags --libs 2>/dev/null),)
|
ifeq ($(shell sdl2-config --cflags --libs 2>/dev/null),)
|
||||||
else
|
else
|
||||||
BUILD_TARGETS += \
|
BUILD_TARGETS += \
|
||||||
command \
|
command \
|
||||||
stream \
|
stream \
|
||||||
lsp \
|
lsp \
|
||||||
|
talk \
|
||||||
talk-llama
|
talk-llama
|
||||||
# talk (TODO: disalbed)
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
default: $(BUILD_TARGETS)
|
default: $(BUILD_TARGETS)
|
||||||
@ -255,9 +252,6 @@ ifdef WHISPER_DEBUG
|
|||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
MK_CPPFLAGS += -DNDEBUG
|
MK_CPPFLAGS += -DNDEBUG
|
||||||
MK_CFLAGS += -O3
|
|
||||||
MK_CXXFLAGS += -O3
|
|
||||||
MK_NVCCFLAGS += -O3
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef WHISPER_SANITIZE_THREAD
|
ifdef WHISPER_SANITIZE_THREAD
|
||||||
@ -444,17 +438,17 @@ endif
|
|||||||
else
|
else
|
||||||
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
|
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
|
||||||
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
|
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
|
||||||
endif # RISCV
|
endif
|
||||||
|
|
||||||
ifndef GGML_NO_ACCELERATE
|
ifndef GGML_NO_ACCELERATE
|
||||||
# Mac OS - include Accelerate framework.
|
# Mac OS - include Accelerate framework.
|
||||||
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
|
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
||||||
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
||||||
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
||||||
MK_LDFLAGS += -framework Accelerate
|
MK_LDFLAGS += -framework Accelerate
|
||||||
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas.o
|
||||||
endif
|
endif
|
||||||
endif # GGML_NO_ACCELERATE
|
endif # GGML_NO_ACCELERATE
|
||||||
|
|
||||||
@ -464,38 +458,29 @@ ifndef GGML_NO_OPENMP
|
|||||||
MK_CXXFLAGS += -fopenmp
|
MK_CXXFLAGS += -fopenmp
|
||||||
endif # GGML_NO_OPENMP
|
endif # GGML_NO_OPENMP
|
||||||
|
|
||||||
ifdef WHISPER_COREML
|
|
||||||
MK_CXXFLAGS += -DWHISPER_USE_COREML
|
|
||||||
LDFLAGS += -framework Foundation -framework CoreML
|
|
||||||
|
|
||||||
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
|
||||||
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
|
||||||
endif
|
|
||||||
endif # WHISPER_COREML
|
|
||||||
|
|
||||||
ifdef GGML_OPENBLAS
|
ifdef GGML_OPENBLAS
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
||||||
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas.o
|
||||||
endif # GGML_OPENBLAS
|
endif # GGML_OPENBLAS
|
||||||
|
|
||||||
ifdef GGML_OPENBLAS64
|
ifdef GGML_OPENBLAS64
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
||||||
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas.o
|
||||||
endif # GGML_OPENBLAS64
|
endif # GGML_OPENBLAS64
|
||||||
|
|
||||||
ifdef GGML_BLIS
|
ifdef GGML_BLIS
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
||||||
MK_LDFLAGS += -lblis -L/usr/local/lib
|
MK_LDFLAGS += -lblis -L/usr/local/lib
|
||||||
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas.o
|
||||||
endif # GGML_BLIS
|
endif # GGML_BLIS
|
||||||
|
|
||||||
ifdef GGML_RPC
|
ifdef GGML_RPC
|
||||||
MK_CPPFLAGS += -DGGML_USE_RPC
|
MK_CPPFLAGS += -DGGML_USE_RPC
|
||||||
OBJ_GGML += ggml/src/ggml-rpc/ggml-rpc.o
|
OBJ_GGML += ggml/src/ggml-rpc.o
|
||||||
endif # GGML_RPC
|
endif # GGML_RPC
|
||||||
|
|
||||||
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
||||||
@ -516,15 +501,16 @@ ifdef GGML_CUDA
|
|||||||
CUDA_PATH ?= /usr/local/cuda
|
CUDA_PATH ?= /usr/local/cuda
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
|
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
|
||||||
#MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
||||||
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
|
||||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
|
||||||
MK_NVCCFLAGS += -use_fast_math
|
MK_NVCCFLAGS += -use_fast_math
|
||||||
|
|
||||||
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
OBJ_GGML += ggml/src/ggml-cuda.o
|
||||||
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||||
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
||||||
|
|
||||||
|
OBJ_WHISPER += src/whisper-mel-cuda.o
|
||||||
|
|
||||||
ifdef WHISPER_FATAL_WARNINGS
|
ifdef WHISPER_FATAL_WARNINGS
|
||||||
MK_NVCCFLAGS += -Werror all-warnings
|
MK_NVCCFLAGS += -Werror all-warnings
|
||||||
endif # WHISPER_FATAL_WARNINGS
|
endif # WHISPER_FATAL_WARNINGS
|
||||||
@ -624,21 +610,25 @@ ggml/src/ggml-cuda/%.o: \
|
|||||||
ggml/src/ggml-cuda/common.cuh
|
ggml/src/ggml-cuda/common.cuh
|
||||||
$(NVCC_COMPILE)
|
$(NVCC_COMPILE)
|
||||||
|
|
||||||
ggml/src/ggml-cuda/ggml-cuda.o: \
|
ggml/src/ggml-cuda.o: \
|
||||||
ggml/src/ggml-cuda/ggml-cuda.cu \
|
ggml/src/ggml-cuda.cu \
|
||||||
ggml/include/ggml-cuda.h \
|
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
ggml/include/ggml-backend.h \
|
ggml/include/ggml-backend.h \
|
||||||
|
ggml/include/ggml-cuda.h \
|
||||||
ggml/src/ggml-backend-impl.h \
|
ggml/src/ggml-backend-impl.h \
|
||||||
ggml/src/ggml-common.h \
|
ggml/src/ggml-common.h \
|
||||||
$(wildcard ggml/src/ggml-cuda/*.cuh)
|
$(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||||
$(NVCC_COMPILE)
|
$(NVCC_COMPILE)
|
||||||
|
|
||||||
|
src/whisper-mel-cuda.o: src/whisper-mel-cuda.cu src/whisper-mel-cuda.hpp
|
||||||
|
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||||
|
|
||||||
endif # GGML_CUDA
|
endif # GGML_CUDA
|
||||||
|
|
||||||
ifdef GGML_VULKAN
|
ifdef GGML_VULKAN
|
||||||
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs vulkan)
|
MK_LDFLAGS += -lvulkan
|
||||||
OBJ_GGML += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o
|
OBJ_GGML += ggml/src/ggml-vulkan.o
|
||||||
|
|
||||||
ifdef GGML_VULKAN_CHECK_RESULTS
|
ifdef GGML_VULKAN_CHECK_RESULTS
|
||||||
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
|
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
|
||||||
@ -652,10 +642,6 @@ ifdef GGML_VULKAN_MEMORY_DEBUG
|
|||||||
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
|
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef GGML_VULKAN_PERF
|
|
||||||
MK_CPPFLAGS += -DGGML_VULKAN_PERF
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef GGML_VULKAN_VALIDATE
|
ifdef GGML_VULKAN_VALIDATE
|
||||||
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
||||||
endif
|
endif
|
||||||
@ -664,28 +650,10 @@ ifdef GGML_VULKAN_RUN_TESTS
|
|||||||
MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
|
MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
GLSLC_CMD = glslc
|
ggml/src/ggml-vulkan.o: \
|
||||||
_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
|
ggml/src/ggml-vulkan.cpp \
|
||||||
_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
|
ggml/include/ggml-vulkan.h
|
||||||
_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
_ggml_vk_input_dir = ggml/src/vulkan-shaders
|
|
||||||
_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)
|
|
||||||
|
|
||||||
ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
|
|
||||||
$(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@
|
|
||||||
|
|
||||||
$(_ggml_vk_header): $(_ggml_vk_source)
|
|
||||||
|
|
||||||
$(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
|
|
||||||
$(_ggml_vk_genshaders_cmd) \
|
|
||||||
--glslc $(GLSLC_CMD) \
|
|
||||||
--input-dir $(_ggml_vk_input_dir) \
|
|
||||||
--target-hpp $(_ggml_vk_header) \
|
|
||||||
--target-cpp $(_ggml_vk_source)
|
|
||||||
|
|
||||||
vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
|
|
||||||
|
|
||||||
endif # GGML_VULKAN
|
endif # GGML_VULKAN
|
||||||
|
|
||||||
ifdef GGML_HIPBLAS
|
ifdef GGML_HIPBLAS
|
||||||
@ -751,43 +719,50 @@ endif # GGML_HIPBLAS
|
|||||||
ifdef GGML_METAL
|
ifdef GGML_METAL
|
||||||
MK_CPPFLAGS += -DGGML_USE_METAL
|
MK_CPPFLAGS += -DGGML_USE_METAL
|
||||||
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
||||||
OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
|
OBJ_GGML += ggml/src/ggml-metal.o
|
||||||
ifdef GGML_METAL_NDEBUG
|
ifdef GGML_METAL_NDEBUG
|
||||||
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
|
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef GGML_METAL_EMBED_LIBRARY
|
ifdef GGML_METAL_EMBED_LIBRARY
|
||||||
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
|
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
|
||||||
OBJ_GGML += ggml/src/ggml-metal/ggml-metal-embed.o
|
OBJ_GGML += ggml/src/ggml-metal-embed.o
|
||||||
endif
|
endif
|
||||||
endif # GGML_METAL
|
endif # GGML_METAL
|
||||||
|
|
||||||
|
ifdef WHISPER_COREML
|
||||||
|
MK_CXXFLAGS += -DWHISPER_USE_COREML
|
||||||
|
LDFLAGS += -framework Foundation -framework CoreML
|
||||||
|
|
||||||
|
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
||||||
|
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
# ===
|
||||||
|
|
||||||
ifdef GGML_METAL
|
ifdef GGML_METAL
|
||||||
ggml/src/ggml-metal/ggml-metal.o: \
|
ggml/src/ggml-metal.o: \
|
||||||
ggml/src/ggml-metal/ggml-metal.m \
|
ggml/src/ggml-metal.m \
|
||||||
ggml/src/ggml-metal/ggml-metal-impl.h \
|
|
||||||
ggml/include/ggml-metal.h \
|
ggml/include/ggml-metal.h \
|
||||||
ggml/include/ggml.h
|
ggml/include/ggml.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ifdef GGML_METAL_EMBED_LIBRARY
|
ifdef GGML_METAL_EMBED_LIBRARY
|
||||||
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
ggml/src/ggml-metal-embed.o: \
|
||||||
ggml/src/ggml-metal/ggml-metal.metal \
|
ggml/src/ggml-metal.metal \
|
||||||
ggml/src/ggml-metal/ggml-metal-impl.h \
|
|
||||||
ggml/src/ggml-common.h
|
ggml/src/ggml-common.h
|
||||||
@echo "Embedding Metal library"
|
@echo "Embedding Metal library"
|
||||||
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
|
||||||
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
||||||
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
||||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
||||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
||||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
|
||||||
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
||||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
||||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
||||||
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
@rm -f ${TEMP_ASSEMBLY}
|
||||||
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
|
||||||
@rmdir ${TEMP_ASSEMBLY}
|
|
||||||
endif
|
endif
|
||||||
endif # GGML_METAL
|
endif # GGML_METAL
|
||||||
|
|
||||||
@ -803,17 +778,9 @@ endif
|
|||||||
|
|
||||||
OBJ_GGML += \
|
OBJ_GGML += \
|
||||||
ggml/src/ggml.o \
|
ggml/src/ggml.o \
|
||||||
ggml/src/ggml-aarch64.o \
|
|
||||||
ggml/src/ggml-alloc.o \
|
ggml/src/ggml-alloc.o \
|
||||||
ggml/src/ggml-backend.o \
|
ggml/src/ggml-backend.o \
|
||||||
ggml/src/ggml-backend-reg.o \
|
ggml/src/ggml-quants.o
|
||||||
ggml/src/ggml-opt.o \
|
|
||||||
ggml/src/ggml-quants.o \
|
|
||||||
ggml/src/ggml-threading.o \
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu.o \
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu-cpp.o \
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu-quants.o
|
|
||||||
|
|
||||||
OBJ_WHISPER += \
|
OBJ_WHISPER += \
|
||||||
src/whisper.o
|
src/whisper.o
|
||||||
@ -918,64 +885,101 @@ endif
|
|||||||
# Build libraries
|
# Build libraries
|
||||||
#
|
#
|
||||||
|
|
||||||
LIB_GGML = libggml.so
|
# ggml
|
||||||
LIB_GGML_S = libggml.a
|
|
||||||
|
|
||||||
LIB_LLAMA = libllama.so
|
ggml/src/ggml.o: \
|
||||||
LIB_LLAMA_S = libllama.a
|
ggml/src/ggml.c \
|
||||||
|
ggml/include/ggml.h
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
LIB_COMMON = libcommon.so
|
ggml/src/ggml-alloc.o: \
|
||||||
LIB_COMMON_S = libcommon.a
|
ggml/src/ggml-alloc.c \
|
||||||
|
|
||||||
LIB_COMMON_SDL = libcommon-sdl.so
|
|
||||||
LIB_COMMON_SDL_S = libcommon-sdl.a
|
|
||||||
|
|
||||||
# Targets
|
|
||||||
BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
|
|
||||||
|
|
||||||
# Dependency files
|
|
||||||
DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
|
|
||||||
|
|
||||||
# Default target
|
|
||||||
all: $(BUILD_TARGETS)
|
|
||||||
|
|
||||||
# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
|
|
||||||
# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
|
||||||
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
|
||||||
ggml/include/ggml-backend.h \
|
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
ggml/include/ggml-alloc.h \
|
ggml/include/ggml-alloc.h
|
||||||
ggml/src/ggml-backend-impl.h \
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
ggml/include/ggml-cpu.h \
|
|
||||||
ggml/src/ggml-impl.h
|
ggml/src/ggml-backend.o: \
|
||||||
|
ggml/src/ggml-backend.c \
|
||||||
|
ggml/include/ggml.h \
|
||||||
|
ggml/include/ggml-backend.h
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
ggml/src/ggml-quants.o: \
|
||||||
|
ggml/src/ggml-quants.c \
|
||||||
|
ggml/include/ggml.h \
|
||||||
|
ggml/src/ggml-quants.h \
|
||||||
|
ggml/src/ggml-common.h
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
ggml/src/ggml-blas.o: \
|
||||||
|
ggml/src/ggml-blas.cpp \
|
||||||
|
ggml/include/ggml-blas.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
# Rules for building object files
|
ifdef GGML_LLAMAFILE
|
||||||
ggml/%.o: ggml/%.c
|
ggml/src/sgemm.o: \
|
||||||
$(CC) $(CFLAGS) -MMD -c $< -o $@
|
ggml/src/sgemm.cpp \
|
||||||
|
ggml/src/sgemm.h \
|
||||||
|
ggml/include/ggml.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
endif # GGML_LLAMAFILE
|
||||||
|
|
||||||
ggml/%.o: ggml/%.cpp
|
ifdef GGML_RPC
|
||||||
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
ggml/src/ggml-rpc.o: \
|
||||||
|
ggml/src/ggml-rpc.cpp \
|
||||||
|
ggml/include/ggml-rpc.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
endif # GGML_RPC
|
||||||
|
|
||||||
src/%.o: src/%.cpp
|
$(LIB_GGML): \
|
||||||
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
$(OBJ_GGML)
|
||||||
|
|
||||||
examples/%.o: examples/%.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
|
||||||
|
|
||||||
# Rules for building libraries
|
|
||||||
$(LIB_GGML): $(OBJ_GGML)
|
|
||||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
$(LIB_GGML_S): $(OBJ_GGML)
|
$(LIB_GGML_S): \
|
||||||
|
$(OBJ_GGML)
|
||||||
ar rcs $(LIB_GGML_S) $^
|
ar rcs $(LIB_GGML_S) $^
|
||||||
|
|
||||||
$(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
|
# whisper
|
||||||
|
|
||||||
|
src/whisper.o: \
|
||||||
|
src/whisper.cpp \
|
||||||
|
src/whisper-mel.hpp \
|
||||||
|
include/whisper.h \
|
||||||
|
ggml/include/ggml.h \
|
||||||
|
ggml/include/ggml-alloc.h \
|
||||||
|
ggml/include/ggml-backend.h \
|
||||||
|
ggml/include/ggml-cuda.h \
|
||||||
|
ggml/include/ggml-metal.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
$(LIB_WHISPER): \
|
||||||
|
$(OBJ_WHISPER) \
|
||||||
|
$(LIB_GGML)
|
||||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
$(LIB_LLAMA_S): $(OBJ_LLAMA)
|
$(LIB_WHISPER_S): \
|
||||||
ar rcs $(LIB_LLAMA_S) $^
|
$(OBJ_WHISPER)
|
||||||
|
ar rcs $(LIB_WHISPER_S) $^
|
||||||
|
|
||||||
|
# common
|
||||||
|
|
||||||
|
examples/common.o: \
|
||||||
|
examples/common.cpp \
|
||||||
|
examples/common.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
examples/common-ggml.o: \
|
||||||
|
examples/common-ggml.cpp \
|
||||||
|
examples/common-ggml.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
$(LIB_COMMON): \
|
||||||
|
$(OBJ_COMMON)
|
||||||
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
$(LIB_COMMON_S): \
|
||||||
|
$(OBJ_COMMON)
|
||||||
|
ar rcs $(LIB_COMMON_S) $^
|
||||||
|
|
||||||
# common-sdl
|
# common-sdl
|
||||||
|
|
||||||
@ -987,21 +991,34 @@ examples/common-sdl.o: \
|
|||||||
examples/common-sdl.h
|
examples/common-sdl.h
|
||||||
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
|
||||||
|
|
||||||
$(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
|
$(LIB_COMMON_SDL): \
|
||||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
$(OBJ_SDL)
|
||||||
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) $(LDFLAGS_SDL)
|
||||||
|
|
||||||
$(LIB_COMMON_S): $(OBJ_COMMON)
|
$(LIB_COMMON_SDL_S): \
|
||||||
ar rcs $(LIB_COMMON_S) $^
|
$(OBJ_SDL)
|
||||||
|
ar rcs $(LIB_COMMON_SDL_S) $^
|
||||||
|
|
||||||
# Include dependency files
|
|
||||||
-include $(DEP_FILES)
|
|
||||||
|
|
||||||
# Clean rule
|
|
||||||
clean:
|
clean:
|
||||||
rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
|
rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||||
rm -rvf *.a *.dll *.so *.dot
|
rm -rvf src/*.o
|
||||||
find ggml src tests examples -type f -name "*.o" -delete
|
rm -rvf src/coreml/*.o
|
||||||
find ggml src tests examples -type f -name "*.d" -delete
|
rm -rvf tests/*.o
|
||||||
|
rm -rvf examples/*.o
|
||||||
|
rm -rvf *.a
|
||||||
|
rm -rvf *.dll
|
||||||
|
rm -rvf *.so
|
||||||
|
rm -rvf *.dot
|
||||||
|
rm -rvf ggml/*.a
|
||||||
|
rm -rvf ggml/*.dll
|
||||||
|
rm -rvf ggml/*.so
|
||||||
|
rm -vrf ggml/src/*.o
|
||||||
|
rm -vrf ggml/src/ggml-metal-embed.metal
|
||||||
|
rm -vrf ggml/src/ggml-cuda/*.o
|
||||||
|
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||||
|
rm -rvf $(BUILD_TARGETS)
|
||||||
|
rm -rvf $(TEST_TARGETS)
|
||||||
|
find examples -type f -name "*.o" -delete
|
||||||
|
|
||||||
#
|
#
|
||||||
# Examples
|
# Examples
|
||||||
@ -1018,6 +1035,9 @@ main: examples/main/main.cpp \
|
|||||||
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
|
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
@echo
|
||||||
|
@echo '==== Run ./llama-cli -h for help. ===='
|
||||||
|
@echo
|
||||||
|
|
||||||
bench: examples/bench/bench.cpp \
|
bench: examples/bench/bench.cpp \
|
||||||
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
|
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
|
||||||
@ -1049,14 +1069,12 @@ lsp: examples/lsp/lsp.cpp \
|
|||||||
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
||||||
|
|
||||||
# TODO: disabled until update
|
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
|
||||||
# https://github.com/ggerganov/whisper.cpp/issues/1818
|
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
|
||||||
#talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
|
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
# $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
||||||
# $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
||||||
# $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
|
||||||
|
|
||||||
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
|
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
|
||||||
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
|
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
|
||||||
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
|
||||||
@ -1070,6 +1088,11 @@ tests: $(TEST_TARGETS)
|
|||||||
tests/test-c.o: tests/test-c.c include/whisper.h
|
tests/test-c.o: tests/test-c.c include/whisper.h
|
||||||
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
|
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
|
||||||
|
|
||||||
|
tests/test-backend-ops: tests/test-backend-ops.cpp \
|
||||||
|
$(OBJ_GGML)
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Audio samples
|
# Audio samples
|
||||||
#
|
#
|
||||||
@ -1115,9 +1138,8 @@ samples:
|
|||||||
.PHONY: large-v1
|
.PHONY: large-v1
|
||||||
.PHONY: large-v2
|
.PHONY: large-v2
|
||||||
.PHONY: large-v3
|
.PHONY: large-v3
|
||||||
.PHONY: large-v3-turbo
|
|
||||||
|
|
||||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: main
|
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
|
||||||
bash ./models/download-ggml-model.sh $@
|
bash ./models/download-ggml-model.sh $@
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
|
@ -18,40 +18,31 @@ let package = Package(
|
|||||||
name: "whisper",
|
name: "whisper",
|
||||||
path: ".",
|
path: ".",
|
||||||
exclude: [
|
exclude: [
|
||||||
"build",
|
|
||||||
"bindings",
|
"bindings",
|
||||||
"cmake",
|
"cmake",
|
||||||
|
"coreml",
|
||||||
"examples",
|
"examples",
|
||||||
"scripts",
|
"extra",
|
||||||
"models",
|
"models",
|
||||||
"samples",
|
"samples",
|
||||||
"tests",
|
"tests",
|
||||||
"CMakeLists.txt",
|
"CMakeLists.txt",
|
||||||
"Makefile",
|
"Makefile"
|
||||||
"ggml/src/ggml-metal/ggml-metal-embed.metal"
|
|
||||||
],
|
],
|
||||||
sources: [
|
sources: [
|
||||||
"ggml/src/ggml.c",
|
"ggml/src/ggml.c",
|
||||||
"src/whisper.cpp",
|
"src/whisper.cpp",
|
||||||
"ggml/src/ggml-aarch64.c",
|
|
||||||
"ggml/src/ggml-alloc.c",
|
"ggml/src/ggml-alloc.c",
|
||||||
"ggml/src/ggml-backend.cpp",
|
"ggml/src/ggml-backend.c",
|
||||||
"ggml/src/ggml-backend-reg.cpp",
|
|
||||||
"ggml/src/ggml-cpu/ggml-cpu.c",
|
|
||||||
"ggml/src/ggml-cpu/ggml-cpu.cpp",
|
|
||||||
"ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
|
|
||||||
"ggml/src/ggml-cpu/ggml-cpu-quants.c",
|
|
||||||
"ggml/src/ggml-quants.c",
|
"ggml/src/ggml-quants.c",
|
||||||
"ggml/src/ggml-threading.cpp",
|
"ggml/src/ggml-metal.m"
|
||||||
"ggml/src/ggml-metal/ggml-metal.m"
|
|
||||||
],
|
],
|
||||||
resources: [.process("ggml/src/ggml-metal/ggml-metal.metal")],
|
resources: [.process("ggml-metal.metal")],
|
||||||
publicHeadersPath: "spm-headers",
|
publicHeadersPath: "spm-headers",
|
||||||
cSettings: [
|
cSettings: [
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||||
.unsafeFlags(["-fno-objc-arc"]),
|
|
||||||
.headerSearchPath("ggml/src"),
|
|
||||||
.define("GGML_USE_ACCELERATE"),
|
.define("GGML_USE_ACCELERATE"),
|
||||||
|
.unsafeFlags(["-fno-objc-arc"]),
|
||||||
.define("GGML_USE_METAL")
|
.define("GGML_USE_METAL")
|
||||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||||
// We should consider add this in the future when we drop support for iOS 14
|
// We should consider add this in the future when we drop support for iOS 14
|
||||||
|
111
README.md
111
README.md
@ -7,23 +7,21 @@
|
|||||||
[](https://conan.io/center/whisper-cpp)
|
[](https://conan.io/center/whisper-cpp)
|
||||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||||
|
|
||||||
Stable: [v1.7.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
Stable: [v1.6.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||||
|
|
||||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||||
|
|
||||||
- Plain C/C++ implementation without dependencies
|
- Plain C/C++ implementation without dependencies
|
||||||
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](https://github.com/ggerganov/whisper.cpp#core-ml-support)
|
||||||
- AVX intrinsics support for x86 architectures
|
- AVX intrinsics support for x86 architectures
|
||||||
- VSX intrinsics support for POWER architectures
|
- VSX intrinsics support for POWER architectures
|
||||||
- Mixed F16 / F32 precision
|
- Mixed F16 / F32 precision
|
||||||
- [Integer quantization support](#quantization)
|
- [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
|
||||||
- Zero memory allocations at runtime
|
- Zero memory allocations at runtime
|
||||||
- [Vulkan support](#vulkan-gpu-support)
|
|
||||||
- Support for CPU-only inference
|
- Support for CPU-only inference
|
||||||
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
|
||||||
- [OpenVINO Support](#openvino-support)
|
- [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
|
||||||
- [Ascend NPU Support](#ascend-npu-support)
|
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
||||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
|
|
||||||
|
|
||||||
Supported platforms:
|
Supported platforms:
|
||||||
|
|
||||||
@ -35,9 +33,9 @@ Supported platforms:
|
|||||||
- [x] [WebAssembly](examples/whisper.wasm)
|
- [x] [WebAssembly](examples/whisper.wasm)
|
||||||
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
||||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
||||||
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
||||||
|
|
||||||
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
|
The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
|
||||||
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
||||||
|
|
||||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||||
@ -57,8 +55,8 @@ Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
|
|||||||
|
|
||||||
## Implementation details
|
## Implementation details
|
||||||
|
|
||||||
- The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
|
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
|
||||||
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
|
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
|
||||||
- Sample usage is demonstrated in [main.cpp](examples/main)
|
- Sample usage is demonstrated in [main.cpp](examples/main)
|
||||||
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
||||||
- Various other examples are available in the [examples](examples) folder
|
- Various other examples are available in the [examples](examples) folder
|
||||||
@ -73,23 +71,17 @@ First clone the repository:
|
|||||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Navigate into the directory:
|
|
||||||
|
|
||||||
```
|
|
||||||
cd whisper.cpp
|
|
||||||
```
|
|
||||||
|
|
||||||
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sh ./models/download-ggml-model.sh base.en
|
bash ./models/download-ggml-model.sh base.en
|
||||||
```
|
```
|
||||||
|
|
||||||
Now build the [main](examples/main) example and transcribe an audio file like this:
|
Now build the [main](examples/main) example and transcribe an audio file like this:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# build the main example
|
# build the main example
|
||||||
make -j
|
make
|
||||||
|
|
||||||
# transcribe an audio file
|
# transcribe an audio file
|
||||||
./main -f samples/jfk.wav
|
./main -f samples/jfk.wav
|
||||||
@ -100,7 +92,7 @@ make -j
|
|||||||
For a quick demo, simply run `make base.en`:
|
For a quick demo, simply run `make base.en`:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
$ make -j base.en
|
$ make base.en
|
||||||
|
|
||||||
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
|
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
|
||||||
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
|
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
|
||||||
@ -153,7 +145,7 @@ options:
|
|||||||
-ng, --no-gpu [false ] disable GPU
|
-ng, --no-gpu [false ] disable GPU
|
||||||
|
|
||||||
|
|
||||||
sh ./models/download-ggml-model.sh base.en
|
bash ./models/download-ggml-model.sh base.en
|
||||||
Downloading ggml model base.en ...
|
Downloading ggml model base.en ...
|
||||||
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
|
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
|
||||||
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
||||||
@ -224,7 +216,7 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
|||||||
If you want some extra audio samples to play with, simply run:
|
If you want some extra audio samples to play with, simply run:
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j samples
|
make samples
|
||||||
```
|
```
|
||||||
|
|
||||||
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
||||||
@ -232,18 +224,17 @@ This will download a few more audio files from Wikipedia and convert them to 16-
|
|||||||
You can download and run the other models as follows:
|
You can download and run the other models as follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j tiny.en
|
make tiny.en
|
||||||
make -j tiny
|
make tiny
|
||||||
make -j base.en
|
make base.en
|
||||||
make -j base
|
make base
|
||||||
make -j small.en
|
make small.en
|
||||||
make -j small
|
make small
|
||||||
make -j medium.en
|
make medium.en
|
||||||
make -j medium
|
make medium
|
||||||
make -j large-v1
|
make large-v1
|
||||||
make -j large-v2
|
make large-v2
|
||||||
make -j large-v3
|
make large-v3
|
||||||
make -j large-v3-turbo
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Memory usage
|
## Memory usage
|
||||||
@ -265,7 +256,7 @@ Here are the steps for creating and using a quantized model:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# quantize a model with Q5_0 method
|
# quantize a model with Q5_0 method
|
||||||
make -j quantize
|
make quantize
|
||||||
./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
||||||
|
|
||||||
# run the examples as usual, specifying the quantized model file
|
# run the examples as usual, specifying the quantized model file
|
||||||
@ -430,16 +421,6 @@ make clean
|
|||||||
GGML_CUDA=1 make -j
|
GGML_CUDA=1 make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
## Vulkan GPU support
|
|
||||||
Cross-vendor solution which allows you to accelerate workload on your GPU.
|
|
||||||
First, make sure your graphics card driver provides support for Vulkan API.
|
|
||||||
|
|
||||||
Now build `whisper.cpp` with Vulkan support:
|
|
||||||
```
|
|
||||||
make clean
|
|
||||||
make GGML_VULKAN=1 -j
|
|
||||||
```
|
|
||||||
|
|
||||||
## BLAS CPU support via OpenBLAS
|
## BLAS CPU support via OpenBLAS
|
||||||
|
|
||||||
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
||||||
@ -467,39 +448,6 @@ cmake -DWHISPER_MKL=ON ..
|
|||||||
WHISPER_MKL=1 make -j
|
WHISPER_MKL=1 make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
## Ascend NPU support
|
|
||||||
|
|
||||||
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
|
|
||||||
|
|
||||||
First, check if your Ascend NPU device is supported:
|
|
||||||
|
|
||||||
**Verified devices**
|
|
||||||
| Ascend NPU | Status |
|
|
||||||
|:-----------------------------:|:-------:|
|
|
||||||
| Atlas 300T A2 | Support |
|
|
||||||
|
|
||||||
Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
|
|
||||||
|
|
||||||
Now build `whisper.cpp` with CANN support:
|
|
||||||
|
|
||||||
```
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -D GGML_CANN=on
|
|
||||||
make -j
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the inference examples as usual, for example:
|
|
||||||
|
|
||||||
```
|
|
||||||
./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
|
|
||||||
```
|
|
||||||
|
|
||||||
*Notes:*
|
|
||||||
|
|
||||||
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
|
||||||
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@ -636,7 +584,7 @@ The [stream](examples/stream) tool samples the audio every half a second and run
|
|||||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make stream -j
|
make stream
|
||||||
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -803,7 +751,7 @@ took to execute it. The results are summarized in the following Github issue:
|
|||||||
|
|
||||||
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
||||||
|
|
||||||
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
|
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](bench.py).
|
||||||
|
|
||||||
You can run it with the following command, by default it will run against any standard model in the models folder.
|
You can run it with the following command, by default it will run against any standard model in the models folder.
|
||||||
|
|
||||||
@ -850,7 +798,6 @@ For more details, see the conversion script [models/convert-pt-to-ggml.py](model
|
|||||||
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
||||||
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
||||||
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
||||||
- [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
|
|
||||||
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
||||||
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
||||||
|
|
||||||
|
@ -14,14 +14,9 @@ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
|||||||
BUILD_DIR := build
|
BUILD_DIR := build
|
||||||
MODELS_DIR := models
|
MODELS_DIR := models
|
||||||
EXAMPLES_DIR := $(wildcard examples/*)
|
EXAMPLES_DIR := $(wildcard examples/*)
|
||||||
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
INCLUDE_PATH := $(abspath ../..)
|
||||||
LIBRARY_PATH := $(abspath ../..)
|
LIBRARY_PATH := $(abspath ../..)
|
||||||
|
|
||||||
ifeq ($(GGML_CUDA),1)
|
|
||||||
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
|
||||||
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
||||||
endif
|
endif
|
||||||
|
@ -62,12 +62,6 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
|
|||||||
make examples
|
make examples
|
||||||
```
|
```
|
||||||
|
|
||||||
To build using cuda support add `GGML_CUDA=1`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
GGML_CUDA=1 make examples
|
|
||||||
```
|
|
||||||
|
|
||||||
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -24,7 +24,7 @@ const (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
// The models which will be downloaded, if no model is specified as an argument
|
// The models which will be downloaded, if no model is specified as an argument
|
||||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
|
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
module github.com/ggerganov/whisper.cpp/bindings/go
|
module github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
|
||||||
go 1.23
|
go 1.19
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-audio/wav v1.1.0
|
github.com/go-audio/wav v1.1.0
|
||||||
github.com/stretchr/testify v1.9.0
|
github.com/stretchr/testify v1.8.1
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||||
@ -8,9 +9,15 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
|||||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
|
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||||
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@ -119,28 +119,6 @@ func (p *Params) SetAudioCtx(n int) {
|
|||||||
p.audio_ctx = C.int(n)
|
p.audio_ctx = C.int(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Params) SetMaxContext(n int) {
|
|
||||||
p.n_max_text_ctx = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetBeamSize(n int) {
|
|
||||||
p.beam_search.beam_size = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetEntropyThold(t float32) {
|
|
||||||
p.entropy_thold = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetTemperature(t float32) {
|
|
||||||
p.temperature = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sets the fallback temperature incrementation
|
|
||||||
// Pass -1.0 to disable this feature
|
|
||||||
func (p *Params) SetTemperatureFallback(t float32) {
|
|
||||||
p.temperature_inc = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set initial prompt
|
// Set initial prompt
|
||||||
func (p *Params) SetInitialPrompt(prompt string) {
|
func (p *Params) SetInitialPrompt(prompt string) {
|
||||||
p.initial_prompt = C.CString(prompt)
|
p.initial_prompt = C.CString(prompt)
|
||||||
@ -171,10 +149,6 @@ func (p *Params) String() string {
|
|||||||
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
||||||
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
||||||
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
||||||
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
|
||||||
str += fmt.Sprintf(" temperature=%f", p.temperature)
|
|
||||||
str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
|
|
||||||
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
|
||||||
if p.translate {
|
if p.translate {
|
||||||
str += " translate"
|
str += " translate"
|
||||||
}
|
}
|
||||||
|
@ -125,32 +125,6 @@ func (context *context) SetAudioCtx(n uint) {
|
|||||||
context.params.SetAudioCtx(int(n))
|
context.params.SetAudioCtx(int(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set maximum number of text context tokens to store
|
|
||||||
func (context *context) SetMaxContext(n int) {
|
|
||||||
context.params.SetMaxContext(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Beam Size
|
|
||||||
func (context *context) SetBeamSize(n int) {
|
|
||||||
context.params.SetBeamSize(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Entropy threshold
|
|
||||||
func (context *context) SetEntropyThold(t float32) {
|
|
||||||
context.params.SetEntropyThold(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Temperature
|
|
||||||
func (context *context) SetTemperature(t float32) {
|
|
||||||
context.params.SetTemperature(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the fallback temperature incrementation
|
|
||||||
// Pass -1.0 to disable this feature
|
|
||||||
func (context *context) SetTemperatureFallback(t float32) {
|
|
||||||
context.params.SetTemperatureFallback(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set initial prompt
|
// Set initial prompt
|
||||||
func (context *context) SetInitialPrompt(prompt string) {
|
func (context *context) SetInitialPrompt(prompt string) {
|
||||||
context.params.SetInitialPrompt(prompt)
|
context.params.SetInitialPrompt(prompt)
|
||||||
|
@ -4,90 +4,52 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
// Packages
|
||||||
"github.com/go-audio/wav"
|
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
assert "github.com/stretchr/testify/assert"
|
assert "github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestSetLanguage(t *testing.T) {
|
const (
|
||||||
assert := assert.New(t)
|
ModelPath = "../../models/ggml-tiny.bin"
|
||||||
|
SamplePath = "../../samples/jfk.wav"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_Whisper_000(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, model not found:", ModelPath)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load model
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
assert.NoError(model.Close())
|
||||||
|
|
||||||
|
t.Log("languages=", model.Languages())
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_Whisper_001(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, model not found:", ModelPath)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load model
|
||||||
model, err := whisper.New(ModelPath)
|
model, err := whisper.New(ModelPath)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.NotNil(model)
|
assert.NotNil(model)
|
||||||
defer model.Close()
|
defer model.Close()
|
||||||
|
|
||||||
context, err := model.NewContext()
|
// Get context for decoding
|
||||||
|
ctx, err := model.NewContext()
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
assert.NotNil(ctx)
|
||||||
|
|
||||||
// This returns an error since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
err = context.SetLanguage("en")
|
|
||||||
assert.Error(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestContextModelIsMultilingual(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
isMultilingual := context.IsMultilingual()
|
|
||||||
|
|
||||||
// This returns false since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
assert.False(isMultilingual)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLanguage(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
// This always returns en since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
expectedLanguage := "en"
|
|
||||||
actualLanguage := context.Language()
|
|
||||||
assert.Equal(expectedLanguage, actualLanguage)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProcess(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
fh, err := os.Open(SamplePath)
|
|
||||||
assert.NoError(err)
|
|
||||||
defer fh.Close()
|
|
||||||
|
|
||||||
// Decode the WAV file - load the full buffer
|
|
||||||
dec := wav.NewDecoder(fh)
|
|
||||||
buf, err := dec.FullPCMBuffer()
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.Equal(uint16(1), dec.NumChans)
|
|
||||||
|
|
||||||
data := buf.AsFloat32Buffer().Data
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
err = context.Process(data, nil, nil)
|
|
||||||
assert.NoError(err)
|
|
||||||
}
|
}
|
||||||
|
@ -48,12 +48,7 @@ type Context interface {
|
|||||||
SetTokenTimestamps(bool) // Set token timestamps flag
|
SetTokenTimestamps(bool) // Set token timestamps flag
|
||||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||||
SetAudioCtx(uint) // Set audio encoder context
|
SetAudioCtx(uint) // Set audio encoder context
|
||||||
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
|
||||||
SetBeamSize(n int) // Set Beam Size
|
|
||||||
SetEntropyThold(t float32) // Set Entropy threshold
|
|
||||||
SetInitialPrompt(prompt string) // Set initial prompt
|
SetInitialPrompt(prompt string) // Set initial prompt
|
||||||
SetTemperature(t float32) // Set temperature
|
|
||||||
SetTemperatureFallback(t float32) // Set temperature incrementation
|
|
||||||
|
|
||||||
// Process mono audio data and return any errors.
|
// Process mono audio data and return any errors.
|
||||||
// If defined, newly generated segments are passed to the
|
// If defined, newly generated segments are passed to the
|
||||||
|
@ -1,91 +0,0 @@
|
|||||||
package whisper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
assert "github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNew(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
t.Run("valid model path", func(t *testing.T) {
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("invalid model path", func(t *testing.T) {
|
|
||||||
invalidModelPath := "invalid-model-path.bin"
|
|
||||||
model, err := whisper.New(invalidModelPath)
|
|
||||||
assert.Error(err)
|
|
||||||
assert.Nil(model)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClose(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
|
|
||||||
err = model.Close()
|
|
||||||
assert.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewContext(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(context)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsMultilingual(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
isMultilingual := model.IsMultilingual()
|
|
||||||
|
|
||||||
// This returns false since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
assert.False(isMultilingual)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLanguages(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
expectedLanguages := []string{
|
|
||||||
"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
|
|
||||||
"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
|
|
||||||
"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
|
|
||||||
"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
|
|
||||||
"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
|
|
||||||
"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
|
|
||||||
"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
|
|
||||||
"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
|
|
||||||
"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
|
|
||||||
}
|
|
||||||
|
|
||||||
actualLanguages := model.Languages()
|
|
||||||
|
|
||||||
assert.Equal(expectedLanguages, actualLanguages)
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
package whisper_test
|
|
||||||
|
|
||||||
const (
|
|
||||||
ModelPath = "../../models/ggml-small.en.bin"
|
|
||||||
SamplePath = "../../samples/jfk.wav"
|
|
||||||
)
|
|
@ -9,7 +9,7 @@ import (
|
|||||||
// CGO
|
// CGO
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
||||||
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
||||||
#include <whisper.h>
|
#include <whisper.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "whisper.cpp",
|
"name": "whisper.cpp",
|
||||||
"version": "1.7.2",
|
"version": "1.6.2",
|
||||||
"description": "Whisper speech recognition",
|
"description": "Whisper speech recognition",
|
||||||
"main": "whisper.js",
|
"main": "whisper.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
3
bindings/ruby/.gitignore
vendored
3
bindings/ruby/.gitignore
vendored
@ -1,3 +0,0 @@
|
|||||||
LICENSE
|
|
||||||
pkg/
|
|
||||||
lib/whisper.*
|
|
@ -1,169 +0,0 @@
|
|||||||
whispercpp
|
|
||||||
==========
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
|
|
||||||
|
|
||||||
Installation
|
|
||||||
------------
|
|
||||||
|
|
||||||
Install the gem and add to the application's Gemfile by executing:
|
|
||||||
|
|
||||||
$ bundle add whispercpp
|
|
||||||
|
|
||||||
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
||||||
|
|
||||||
$ gem install whispercpp
|
|
||||||
|
|
||||||
Usage
|
|
||||||
-----
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
require "whisper"
|
|
||||||
|
|
||||||
whisper = Whisper::Context.new("path/to/model.bin")
|
|
||||||
|
|
||||||
params = Whisper::Params.new
|
|
||||||
params.language = "en"
|
|
||||||
params.offset = 10_000
|
|
||||||
params.duration = 60_000
|
|
||||||
params.max_text_tokens = 300
|
|
||||||
params.translate = true
|
|
||||||
params.print_timestamps = false
|
|
||||||
params.initial_prompt = "Initial prompt here."
|
|
||||||
|
|
||||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
|
||||||
puts whole_text
|
|
||||||
end
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### Preparing model ###
|
|
||||||
|
|
||||||
Use script to download model file(s):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
|
||||||
cd whisper.cpp
|
|
||||||
sh ./models/download-ggml-model.sh base.en
|
|
||||||
```
|
|
||||||
|
|
||||||
There are some types of models. See [models][] page for details.
|
|
||||||
|
|
||||||
### Preparing audio file ###
|
|
||||||
|
|
||||||
Currently, whisper.cpp accepts only 16-bit WAV files.
|
|
||||||
|
|
||||||
### API ###
|
|
||||||
|
|
||||||
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
def format_time(time_ms)
|
|
||||||
sec, decimal_part = time_ms.divmod(1000)
|
|
||||||
min, sec = sec.divmod(60)
|
|
||||||
hour, min = min.divmod(60)
|
|
||||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
|
||||||
end
|
|
||||||
|
|
||||||
whisper.transcribe("path/to/audio.wav", params)
|
|
||||||
|
|
||||||
whisper.each_segment.with_index do |segment, index|
|
|
||||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
|
||||||
nth: index + 1,
|
|
||||||
st: format_time(segment.start_time),
|
|
||||||
ed: format_time(segment.end_time),
|
|
||||||
text: segment.text
|
|
||||||
}
|
|
||||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
|
||||||
puts line
|
|
||||||
end
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also add hook to params called on new segment:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
def format_time(time_ms)
|
|
||||||
sec, decimal_part = time_ms.divmod(1000)
|
|
||||||
min, sec = sec.divmod(60)
|
|
||||||
hour, min = min.divmod(60)
|
|
||||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Add hook before calling #transcribe
|
|
||||||
params.on_new_segment do |segment|
|
|
||||||
line = "[%{st} --> %{ed}] %{text}" % {
|
|
||||||
st: format_time(segment.start_time),
|
|
||||||
ed: format_time(segment.end_time),
|
|
||||||
text: segment.text
|
|
||||||
}
|
|
||||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
|
||||||
puts line
|
|
||||||
end
|
|
||||||
|
|
||||||
whisper.transcribe("path/to/audio.wav", params)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
You can see model information:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper = Whisper::Context.new("path/to/model.bin")
|
|
||||||
model = whisper.model
|
|
||||||
|
|
||||||
model.n_vocab # => 51864
|
|
||||||
model.n_audio_ctx # => 1500
|
|
||||||
model.n_audio_state # => 512
|
|
||||||
model.n_audio_head # => 8
|
|
||||||
model.n_audio_layer # => 6
|
|
||||||
model.n_text_ctx # => 448
|
|
||||||
model.n_text_state # => 512
|
|
||||||
model.n_text_head # => 8
|
|
||||||
model.n_text_layer # => 6
|
|
||||||
model.n_mels # => 80
|
|
||||||
model.ftype # => 1
|
|
||||||
model.type # => "base"
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
You can set log callback:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
prefix = "[MyApp] "
|
|
||||||
log_callback = ->(level, buffer, user_data) {
|
|
||||||
case level
|
|
||||||
when Whisper::LOG_LEVEL_NONE
|
|
||||||
puts "#{user_data}none: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_INFO
|
|
||||||
puts "#{user_data}info: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_WARN
|
|
||||||
puts "#{user_data}warn: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_ERROR
|
|
||||||
puts "#{user_data}error: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_DEBUG
|
|
||||||
puts "#{user_data}debug: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_CONT
|
|
||||||
puts "#{user_data}same to previous: #{buffer}"
|
|
||||||
end
|
|
||||||
}
|
|
||||||
Whisper.log_set log_callback, prefix
|
|
||||||
```
|
|
||||||
|
|
||||||
Using this feature, you are also able to suppress log:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
Whisper.log_set ->(level, buffer, user_data) {
|
|
||||||
# do nothing
|
|
||||||
}, nil
|
|
||||||
Whisper::Context.new(MODEL)
|
|
||||||
```
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
The same to [whisper.cpp][].
|
|
||||||
|
|
||||||
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
|
||||||
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
|
@ -1,68 +1,12 @@
|
|||||||
require 'rake/clean'
|
require 'rake/clean'
|
||||||
require "bundler/gem_tasks"
|
require 'rubygems/package'
|
||||||
require "pathname"
|
|
||||||
require "yaml"
|
|
||||||
require "rake/testtask"
|
|
||||||
|
|
||||||
extsources = YAML.load_file("extsources.yaml")
|
desc 'Build gem'
|
||||||
SOURCES = FileList[]
|
task :package do
|
||||||
extsources.each do |src|
|
spec_source = File.read File.join(File.dirname(__FILE__),'whispercpp.gemspec')
|
||||||
basename = src.pathmap("%f")
|
spec = nil
|
||||||
dest = basename == "LICENSE" ? basename : basename.pathmap("ext/%f")
|
# see: http://gist.github.com/16215
|
||||||
file src
|
Thread.new { spec = eval("#{spec_source}") }.join
|
||||||
file dest => src do |t|
|
spec.validate
|
||||||
cp t.source, t.name
|
Gem::Package.build(spec)
|
||||||
end
|
|
||||||
SOURCES.include dest
|
|
||||||
end
|
|
||||||
CLEAN.include SOURCES
|
|
||||||
CLEAN.include FileList[
|
|
||||||
"ext/*.o",
|
|
||||||
"ext/*.metal",
|
|
||||||
"ext/whisper.{so,bundle,dll}",
|
|
||||||
"ext/depend"
|
|
||||||
]
|
|
||||||
|
|
||||||
task build: FileList[
|
|
||||||
"ext/Makefile",
|
|
||||||
"ext/ruby_whisper.h",
|
|
||||||
"ext/ruby_whisper.cpp",
|
|
||||||
"whispercpp.gemspec",
|
|
||||||
]
|
|
||||||
|
|
||||||
directory "pkg"
|
|
||||||
CLOBBER.include "pkg"
|
|
||||||
|
|
||||||
TEST_MODEL = "../../models/ggml-base.en.bin"
|
|
||||||
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
|
||||||
SO_FILE = File.join("ext", LIB_NAME)
|
|
||||||
LIB_FILE = File.join("lib", LIB_NAME)
|
|
||||||
|
|
||||||
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
|
|
||||||
Dir.chdir "ext" do
|
|
||||||
ruby "extconf.rb"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
file SO_FILE => "ext/Makefile" do |t|
|
|
||||||
Dir.chdir "ext" do
|
|
||||||
sh "make"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
CLEAN.include LIB_FILE
|
|
||||||
|
|
||||||
directory "lib"
|
|
||||||
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
|
||||||
copy t.source, t.name
|
|
||||||
end
|
|
||||||
|
|
||||||
Rake::TestTask.new do |t|
|
|
||||||
t.test_files = FileList["tests/test_*.rb"]
|
|
||||||
end
|
|
||||||
task test: [TEST_MODEL, LIB_FILE]
|
|
||||||
|
|
||||||
file TEST_MODEL do
|
|
||||||
Dir.chdir "../.." do
|
|
||||||
sh "./models/download-ggml-model.sh base.en"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
28
bindings/ruby/ext/.gitignore
vendored
28
bindings/ruby/ext/.gitignore
vendored
@ -3,33 +3,7 @@ ggml.c
|
|||||||
ggml.h
|
ggml.h
|
||||||
ggml-alloc.c
|
ggml-alloc.c
|
||||||
ggml-alloc.h
|
ggml-alloc.h
|
||||||
ggml-aarch64.c
|
whisper.bundle
|
||||||
ggml-aarch64.h
|
|
||||||
ggml-backend.cpp
|
|
||||||
ggml-backend-impl.h
|
|
||||||
ggml-backend.c
|
|
||||||
ggml-backend.h
|
|
||||||
ggml-common.h
|
|
||||||
ggml-cpu-impl.h
|
|
||||||
ggml-metal.m
|
|
||||||
ggml-metal.metal
|
|
||||||
ggml-metal-embed.metal
|
|
||||||
ggml-blas.cpp
|
|
||||||
ggml-cuda.h
|
|
||||||
ggml-impl.h
|
|
||||||
ggml-kompute.h
|
|
||||||
ggml-metal.h
|
|
||||||
ggml-opencl.h
|
|
||||||
ggml-quants.c
|
|
||||||
ggml-quants.h
|
|
||||||
ggml-sycl.h
|
|
||||||
ggml-vulkan.h
|
|
||||||
ggml-blas.h
|
|
||||||
get-flags.mk
|
|
||||||
whisper.cpp
|
whisper.cpp
|
||||||
whisper.h
|
whisper.h
|
||||||
dr_wav.h
|
dr_wav.h
|
||||||
depend
|
|
||||||
whisper.bundle
|
|
||||||
whisper.so
|
|
||||||
whisper.dll
|
|
||||||
|
@ -1,10 +1,23 @@
|
|||||||
require 'mkmf'
|
require 'mkmf'
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper-mel.hpp')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-common.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
|
||||||
|
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
|
||||||
|
|
||||||
|
|
||||||
# need to use c++ compiler flags
|
# need to use c++ compiler flags
|
||||||
$CXXFLAGS << ' -std=c++11'
|
$CXXFLAGS << ' -std=c++11'
|
||||||
|
|
||||||
$LDFLAGS << ' -lstdc++'
|
|
||||||
|
|
||||||
# Set to true when building binary gems
|
# Set to true when building binary gems
|
||||||
if enable_config('static-stdlib', false)
|
if enable_config('static-stdlib', false)
|
||||||
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
||||||
@ -15,180 +28,4 @@ if enable_config('march-tune-native', false)
|
|||||||
$CXXFLAGS << ' -march=native -mtune=native'
|
$CXXFLAGS << ' -march=native -mtune=native'
|
||||||
end
|
end
|
||||||
|
|
||||||
if ENV['WHISPER_METAL']
|
|
||||||
$GGML_METAL ||= true
|
|
||||||
$DEPRECATE_WARNING ||= true
|
|
||||||
end
|
|
||||||
|
|
||||||
$UNAME_S = `uname -s`.chomp
|
|
||||||
$UNAME_P = `uname -p`.chomp
|
|
||||||
$UNAME_M = `uname -m`.chomp
|
|
||||||
|
|
||||||
if $UNAME_S == 'Darwin'
|
|
||||||
unless ENV['GGML_NO_METAL']
|
|
||||||
$GGML_METAL ||= true
|
|
||||||
end
|
|
||||||
$GGML_NO_OPENMP ||= true
|
|
||||||
end
|
|
||||||
|
|
||||||
if $GGML_METAL
|
|
||||||
$GGML_METAL_EMBED_LIBRARY = true
|
|
||||||
end
|
|
||||||
|
|
||||||
$MK_CPPFLAGS = ''
|
|
||||||
$MK_CFLAGS = '-std=c11 -fPIC'
|
|
||||||
$MK_CXXFLAGS = '-std=c++11 -fPIC'
|
|
||||||
$MK_NVCCFLAGS = '-std=c++11'
|
|
||||||
$MK_LDFLAGS = ''
|
|
||||||
|
|
||||||
$OBJ_GGML = []
|
|
||||||
$OBJ_WHISPER = []
|
|
||||||
$OBJ_COMMON = []
|
|
||||||
$OBJ_SDL = []
|
|
||||||
|
|
||||||
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
|
||||||
|
|
||||||
if $UNAME_S == 'Linux'
|
|
||||||
$MK_CPPFLAGS << ' -D_GNU_SOURCE'
|
|
||||||
end
|
|
||||||
|
|
||||||
if $UNAME_S == 'Darwin'
|
|
||||||
$MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
|
|
||||||
end
|
|
||||||
|
|
||||||
if ENV['WHISPER_DEBUG']
|
|
||||||
$MK_CFLAGS << ' -O0 -g'
|
|
||||||
$MK_CXXFLAGS << ' -O0 -g'
|
|
||||||
$MK_LDFLAGS << ' -g'
|
|
||||||
$MK_NVCCFLAGS << ' -O0 -g'
|
|
||||||
else
|
|
||||||
$MK_CPPFLAGS << ' -DNDEBUG'
|
|
||||||
$MK_CFLAGS << ' -O3'
|
|
||||||
$MK_CXXFLAGS << ' -O3'
|
|
||||||
$MK_NVCCFLAGS << ' -O3'
|
|
||||||
end
|
|
||||||
|
|
||||||
$WARN_FLAGS =
|
|
||||||
' -Wall' <<
|
|
||||||
' -Wextra' <<
|
|
||||||
' -Wpedantic' <<
|
|
||||||
' -Wcast-qual' <<
|
|
||||||
' -Wno-unused-function'
|
|
||||||
|
|
||||||
$MK_CFLAGS <<
|
|
||||||
$WARN_FLAGS <<
|
|
||||||
' -Wshadow' <<
|
|
||||||
' -Wstrict-prototypes' <<
|
|
||||||
' -Wpointer-arith' <<
|
|
||||||
' -Wmissing-prototypes' <<
|
|
||||||
' -Werror=implicit-int' <<
|
|
||||||
' -Werror=implicit-function-declaration'
|
|
||||||
|
|
||||||
$MK_CXXFLAGS <<
|
|
||||||
$WARN_FLAGS <<
|
|
||||||
' -Wmissing-declarations' <<
|
|
||||||
' -Wmissing-noreturn'
|
|
||||||
|
|
||||||
unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
|
|
||||||
$MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
|
|
||||||
end
|
|
||||||
|
|
||||||
if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
|
|
||||||
$MK_CFLAGS << ' -pthread'
|
|
||||||
$MK_CXXFLAGS << ' -pthread'
|
|
||||||
end
|
|
||||||
|
|
||||||
unless $_WIN32
|
|
||||||
$DSO_EXT = '.so'
|
|
||||||
else
|
|
||||||
$DSO_EXT = '.dll'
|
|
||||||
end
|
|
||||||
|
|
||||||
unless ENV['RISCV']
|
|
||||||
if %w[x86_64 i686 amd64].include? $UNAME_M
|
|
||||||
$HOST_CXXFLAGS ||= ''
|
|
||||||
|
|
||||||
$MK_CFLAGS << ' -march=native -mtune=native'
|
|
||||||
$HOST_CXXFLAGS << ' -march=native -mtune=native'
|
|
||||||
end
|
|
||||||
|
|
||||||
if $UNAME_M.match? /aarch64.*/
|
|
||||||
$MK_CFLAGS << ' -mcpu=native'
|
|
||||||
$MK_CXXFLAGS << ' -mcpu=native'
|
|
||||||
end
|
|
||||||
else
|
|
||||||
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
|
||||||
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
|
||||||
end
|
|
||||||
|
|
||||||
unless ENV['GGML_NO_ACCELERATE']
|
|
||||||
if $UNAME_S == 'Darwin'
|
|
||||||
$MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS'
|
|
||||||
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
|
||||||
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
|
||||||
$MK_LDFLAGS << ' -framework Accelerate'
|
|
||||||
$OBJ_GGML << 'ggml-blas.o'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if ENV['GGML_OPENBLAS']
|
|
||||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
|
||||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
|
||||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
|
||||||
$OBJ_GGML << 'ggml-blas.o'
|
|
||||||
end
|
|
||||||
|
|
||||||
if ENV['GGML_OPENBLAS64']
|
|
||||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
|
||||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
|
||||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
|
||||||
$OBJ_GGML << 'ggml-blas.o'
|
|
||||||
end
|
|
||||||
|
|
||||||
if $GGML_METAL
|
|
||||||
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
|
||||||
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
|
||||||
$OBJ_GGML << 'ggml-metal.o'
|
|
||||||
|
|
||||||
if ENV['GGML_METAL_NDEBUG']
|
|
||||||
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
|
||||||
end
|
|
||||||
|
|
||||||
if $GGML_METAL_EMBED_LIBRARY
|
|
||||||
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
|
||||||
$OBJ_GGML << 'ggml-metal-embed.o'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
$OBJ_GGML <<
|
|
||||||
'ggml.o' <<
|
|
||||||
'ggml-cpu.o' <<
|
|
||||||
'ggml-alloc.o' <<
|
|
||||||
'ggml-backend.o' <<
|
|
||||||
'ggml-quants.o' <<
|
|
||||||
'ggml-aarch64.o'
|
|
||||||
|
|
||||||
$OBJ_WHISPER <<
|
|
||||||
'whisper.o'
|
|
||||||
|
|
||||||
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
|
||||||
$objs << "ruby_whisper.o"
|
|
||||||
|
|
||||||
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
|
||||||
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
|
||||||
$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
|
|
||||||
$CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
|
||||||
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
|
||||||
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
|
||||||
|
|
||||||
create_makefile('whisper')
|
create_makefile('whisper')
|
||||||
|
|
||||||
File.open 'Makefile', 'a' do |file|
|
|
||||||
file.puts 'include get-flags.mk'
|
|
||||||
|
|
||||||
if $GGML_METAL
|
|
||||||
if $GGML_METAL_EMBED_LIBRARY
|
|
||||||
file.puts 'include metal-embed.mk'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
141
bindings/ruby/ext/ggml-backend-impl.h
Normal file
141
bindings/ruby/ext/ggml-backend-impl.h
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
// ggml-backend internal header
|
||||||
|
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend buffer
|
||||||
|
//
|
||||||
|
|
||||||
|
// buffer type
|
||||||
|
typedef void * ggml_backend_buffer_type_context_t;
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_type_i {
|
||||||
|
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
|
||||||
|
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
|
||||||
|
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
|
||||||
|
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
|
||||||
|
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
|
||||||
|
// check if tensor data is in host memory
|
||||||
|
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
|
||||||
|
bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_type {
|
||||||
|
struct ggml_backend_buffer_type_i iface;
|
||||||
|
ggml_backend_buffer_type_context_t context;
|
||||||
|
};
|
||||||
|
|
||||||
|
// buffer
|
||||||
|
typedef void * ggml_backend_buffer_context_t;
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_i {
|
||||||
|
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
|
||||||
|
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
|
||||||
|
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
|
||||||
|
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
|
||||||
|
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_buffer {
|
||||||
|
struct ggml_backend_buffer_i iface;
|
||||||
|
ggml_backend_buffer_type_t buft;
|
||||||
|
ggml_backend_buffer_context_t context;
|
||||||
|
size_t size;
|
||||||
|
enum ggml_backend_buffer_usage usage;
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||||
|
ggml_backend_buffer_type_t buft,
|
||||||
|
struct ggml_backend_buffer_i iface,
|
||||||
|
ggml_backend_buffer_context_t context,
|
||||||
|
size_t size);
|
||||||
|
|
||||||
|
// do not use directly, use ggml_backend_tensor_copy instead
|
||||||
|
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// buffer that contains a collection of buffers
|
||||||
|
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
||||||
|
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef void * ggml_backend_context_t;
|
||||||
|
|
||||||
|
struct ggml_backend_i {
|
||||||
|
const char * (*GGML_CALL get_name)(ggml_backend_t backend);
|
||||||
|
|
||||||
|
void (*GGML_CALL free)(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// buffer allocation
|
||||||
|
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// (optional) asynchronous tensor data access
|
||||||
|
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// (optional) complete all pending operations
|
||||||
|
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// compute graph with a plan (not used currently)
|
||||||
|
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
||||||
|
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
|
||||||
|
// compute graph with a plan
|
||||||
|
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
// compute graph without a plan (async)
|
||||||
|
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
// check if the backend supports an operation
|
||||||
|
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
|
||||||
|
// these should be expensive operations with large batch sizes that may benefit from running on this backend
|
||||||
|
// even if the weight has to be copied from the CPU temporarily
|
||||||
|
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// (optional) event synchronization
|
||||||
|
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
|
||||||
|
void (*GGML_CALL event_free) (ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_backend {
|
||||||
|
ggml_guid_t guid;
|
||||||
|
|
||||||
|
struct ggml_backend_i iface;
|
||||||
|
ggml_backend_context_t context;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_event {
|
||||||
|
ggml_backend_t backend;
|
||||||
|
void * context;
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend registry
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
|
||||||
|
|
||||||
|
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
2095
bindings/ruby/ext/ggml-backend.c
Normal file
2095
bindings/ruby/ext/ggml-backend.c
Normal file
File diff suppressed because it is too large
Load Diff
233
bindings/ruby/ext/ggml-backend.h
Normal file
233
bindings/ruby/ext/ggml-backend.h
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||||
|
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||||
|
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
typedef void * ggml_backend_graph_plan_t;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend buffer
|
||||||
|
//
|
||||||
|
|
||||||
|
// buffer type
|
||||||
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
||||||
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
|
||||||
|
// buffer
|
||||||
|
enum ggml_backend_buffer_usage {
|
||||||
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||||
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||||
|
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||||
|
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// tensor copy between different backends
|
||||||
|
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// asynchronous copy
|
||||||
|
// the copy is performed after all the currently queued operations in backend_src
|
||||||
|
// backend_dst will wait for the copy to complete before performing other operations
|
||||||
|
// automatic fallback to sync copy if async is not supported
|
||||||
|
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// events
|
||||||
|
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
|
||||||
|
|
||||||
|
//
|
||||||
|
// CPU backend
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||||
|
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
|
||||||
|
// Create a backend buffer from an existing pointer
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef GGML_USE_CPU_HBM
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend registry
|
||||||
|
//
|
||||||
|
|
||||||
|
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
||||||
|
|
||||||
|
GGML_API size_t ggml_backend_reg_get_count(void);
|
||||||
|
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
|
||||||
|
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend scheduler
|
||||||
|
//
|
||||||
|
|
||||||
|
// The backend scheduler allows for multiple backends to be used together
|
||||||
|
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||||
|
// The backends are selected based on:
|
||||||
|
// - the backend that supports the operation
|
||||||
|
// - the location of the pre-allocated tensors (e.g. the weights)
|
||||||
|
/*
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||||
|
// preferrably to run on the same backend as the buffer
|
||||||
|
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||||
|
|
||||||
|
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||||
|
|
||||||
|
// initialize buffers from a max size graph (optional)
|
||||||
|
reserve_graph = build_graph(sched, max_batch_size);
|
||||||
|
|
||||||
|
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||||
|
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||||
|
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||||
|
|
||||||
|
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||||
|
|
||||||
|
// compute
|
||||||
|
graph = build_graph(sched);
|
||||||
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
|
|
||||||
|
// if there are graph inputs:
|
||||||
|
ggml_backend_sched_reset(sched);
|
||||||
|
ggml_backend_sched_alloc_graph(sched, graph);
|
||||||
|
ggml_backend_tensor_set(input_tensor, ...);
|
||||||
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct ggml_backend_sched;
|
||||||
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||||
|
|
||||||
|
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||||
|
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||||
|
//
|
||||||
|
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
||||||
|
// if the user returns false, the scheduler will cancel the graph compute
|
||||||
|
//
|
||||||
|
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||||
|
|
||||||
|
// Initialize a backend scheduler
|
||||||
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||||
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Initialize backend buffers from a measure graph
|
||||||
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||||
|
|
||||||
|
// Get the number of splits of the last graph
|
||||||
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||||
|
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||||
|
|
||||||
|
// Allocate and compute graph on the backend scheduler
|
||||||
|
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Reset all assignments and allocators - must be called before changing the node backends
|
||||||
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Set a callback to be called for each resulting node during graph compute
|
||||||
|
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct ggml_backend_graph_copy {
|
||||||
|
ggml_backend_buffer_t buffer;
|
||||||
|
struct ggml_context * ctx_allocated;
|
||||||
|
struct ggml_context * ctx_unallocated;
|
||||||
|
struct ggml_cgraph * graph;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Copy a graph to a different backend
|
||||||
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||||
|
|
||||||
|
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||||
|
|
||||||
|
// Compare the output of two backends
|
||||||
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
// Tensor initialization
|
||||||
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||||
|
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
1853
bindings/ruby/ext/ggml-common.h
Normal file
1853
bindings/ruby/ext/ggml-common.h
Normal file
File diff suppressed because it is too large
Load Diff
43
bindings/ruby/ext/ggml-cuda.h
Normal file
43
bindings/ruby/ext/ggml-cuda.h
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef GGML_USE_HIPBLAS
|
||||||
|
#define GGML_CUDA_NAME "ROCm"
|
||||||
|
#define GGML_CUBLAS_NAME "hipBLAS"
|
||||||
|
#else
|
||||||
|
#define GGML_CUDA_NAME "CUDA"
|
||||||
|
#define GGML_CUBLAS_NAME "cuBLAS"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_CUDA_MAX_DEVICES 16
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// device buffer
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
272
bindings/ruby/ext/ggml-impl.h
Normal file
272
bindings/ruby/ext/ggml-impl.h
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
// GGML internal header
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <string.h> // memcpy
|
||||||
|
#include <math.h> // fabsf
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// static_assert should be a #define, but if it's not,
|
||||||
|
// fall back to the _Static_assert C11 keyword.
|
||||||
|
// if C99 - static_assert is noop
|
||||||
|
// ref: https://stackoverflow.com/a/53923785/4039976
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#ifndef static_assert
|
||||||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
||||||
|
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
||||||
|
#else
|
||||||
|
#define static_assert(cond, msg) struct global_scope_noop_trick
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
||||||
|
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
||||||
|
#ifndef __FMA__
|
||||||
|
#define __FMA__
|
||||||
|
#endif
|
||||||
|
#ifndef __F16C__
|
||||||
|
#define __F16C__
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
|
||||||
|
#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
|
||||||
|
#ifndef __SSE3__
|
||||||
|
#define __SSE3__
|
||||||
|
#endif
|
||||||
|
#ifndef __SSSE3__
|
||||||
|
#define __SSSE3__
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// 16-bit float
|
||||||
|
// on Arm, we use __fp16
|
||||||
|
// on x86, we use uint16_t
|
||||||
|
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||||
|
|
||||||
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
||||||
|
//
|
||||||
|
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
||||||
|
//
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
typedef __fp16 ggml_fp16_internal_t;
|
||||||
|
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
|
||||||
|
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
|
||||||
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
|
ggml_fp16_internal_t tmp;
|
||||||
|
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
||||||
|
return (float)tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
ggml_fp16_t res;
|
||||||
|
ggml_fp16_internal_t tmp = f;
|
||||||
|
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
typedef uint16_t ggml_fp16_internal_t;
|
||||||
|
|
||||||
|
#ifdef __wasm_simd128__
|
||||||
|
#include <wasm_simd128.h>
|
||||||
|
#else
|
||||||
|
#ifdef __POWER9_VECTOR__
|
||||||
|
#include <altivec.h>
|
||||||
|
#undef bool
|
||||||
|
#define bool _Bool
|
||||||
|
#else
|
||||||
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
|
||||||
|
#if !defined(__riscv)
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __riscv_v_intrinsic
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __F16C__
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
|
||||||
|
#else
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#elif defined(__POWER9_VECTOR__)
|
||||||
|
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
/* the inline asm below is about 12% faster than the lookup method */
|
||||||
|
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
||||||
|
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||||
|
|
||||||
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
|
register float f;
|
||||||
|
register double d;
|
||||||
|
__asm__(
|
||||||
|
"mtfprd %0,%2\n"
|
||||||
|
"xscvhpdp %0,%0\n"
|
||||||
|
"frsp %1,%0\n" :
|
||||||
|
/* temp */ "=d"(d),
|
||||||
|
/* out */ "=f"(f):
|
||||||
|
/* in */ "r"(h));
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
register double d;
|
||||||
|
register ggml_fp16_t r;
|
||||||
|
__asm__( /* xscvdphp can work on double or single precision */
|
||||||
|
"xscvdphp %0,%2\n"
|
||||||
|
"mffprd %1,%0\n" :
|
||||||
|
/* temp */ "=d"(d),
|
||||||
|
/* out */ "=r"(r):
|
||||||
|
/* in */ "f"(f));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// FP16 <-> FP32
|
||||||
|
// ref: https://github.com/Maratyszcza/FP16
|
||||||
|
|
||||||
|
static inline float fp32_from_bits(uint32_t w) {
|
||||||
|
union {
|
||||||
|
uint32_t as_bits;
|
||||||
|
float as_value;
|
||||||
|
} fp32;
|
||||||
|
fp32.as_bits = w;
|
||||||
|
return fp32.as_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t fp32_to_bits(float f) {
|
||||||
|
union {
|
||||||
|
float as_value;
|
||||||
|
uint32_t as_bits;
|
||||||
|
} fp32;
|
||||||
|
fp32.as_value = f;
|
||||||
|
return fp32.as_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
|
const uint32_t w = (uint32_t) h << 16;
|
||||||
|
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||||
|
const uint32_t two_w = w + w;
|
||||||
|
|
||||||
|
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
||||||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||||
|
const float exp_scale = 0x1.0p-112f;
|
||||||
|
#else
|
||||||
|
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
||||||
|
#endif
|
||||||
|
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
||||||
|
|
||||||
|
const uint32_t magic_mask = UINT32_C(126) << 23;
|
||||||
|
const float magic_bias = 0.5f;
|
||||||
|
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
||||||
|
|
||||||
|
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
||||||
|
const uint32_t result = sign |
|
||||||
|
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
||||||
|
return fp32_from_bits(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||||
|
const float scale_to_inf = 0x1.0p+112f;
|
||||||
|
const float scale_to_zero = 0x1.0p-110f;
|
||||||
|
#else
|
||||||
|
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
||||||
|
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
||||||
|
#endif
|
||||||
|
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
||||||
|
|
||||||
|
const uint32_t w = fp32_to_bits(f);
|
||||||
|
const uint32_t shl1_w = w + w;
|
||||||
|
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||||
|
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
||||||
|
if (bias < UINT32_C(0x71000000)) {
|
||||||
|
bias = UINT32_C(0x71000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
||||||
|
const uint32_t bits = fp32_to_bits(base);
|
||||||
|
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
||||||
|
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
||||||
|
const uint32_t nonsign = exp_bits + mantissa_bits;
|
||||||
|
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
|
||||||
|
#endif // __F16C__
|
||||||
|
|
||||||
|
#endif // __ARM_NEON
|
||||||
|
|
||||||
|
// precomputed f32 table for f16 (256 KB)
|
||||||
|
// defined in ggml.c, initialized in ggml_init()
|
||||||
|
extern float ggml_table_f32_f16[1 << 16];
|
||||||
|
|
||||||
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
||||||
|
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
||||||
|
// This is also true for POWER9.
|
||||||
|
#if !defined(GGML_FP16_TO_FP32)
|
||||||
|
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
||||||
|
uint16_t s;
|
||||||
|
memcpy(&s, &f, sizeof(uint16_t));
|
||||||
|
return ggml_table_f32_f16[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(GGML_FP32_TO_FP16)
|
||||||
|
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_HASHTABLE_FULL ((size_t)-1)
|
||||||
|
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
|
||||||
|
|
||||||
|
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
||||||
|
|
||||||
|
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
|
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
||||||
|
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
|
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
||||||
|
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
|
// return index, asserts if table is full
|
||||||
|
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
46
bindings/ruby/ext/ggml-kompute.h
Normal file
46
bindings/ruby/ext/ggml-kompute.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ggml_vk_device {
|
||||||
|
int index;
|
||||||
|
int type; // same as VkPhysicalDeviceType
|
||||||
|
size_t heapSize;
|
||||||
|
const char * name;
|
||||||
|
const char * vendor;
|
||||||
|
int subgroupSize;
|
||||||
|
uint64_t bufferAlignment;
|
||||||
|
uint64_t maxAlloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||||
|
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||||
|
bool ggml_vk_has_vulkan(void);
|
||||||
|
bool ggml_vk_has_device(void);
|
||||||
|
struct ggml_vk_device ggml_vk_current_device(void);
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
//
|
||||||
|
|
||||||
|
// forward declaration
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
|
||||||
|
GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
66
bindings/ruby/ext/ggml-metal.h
Normal file
66
bindings/ruby/ext/ggml-metal.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// An interface allowing to compute ggml_cgraph with Metal
|
||||||
|
//
|
||||||
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||||
|
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
|
||||||
|
//
|
||||||
|
// How it works?
|
||||||
|
//
|
||||||
|
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
||||||
|
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
||||||
|
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
||||||
|
//
|
||||||
|
// You only need to make sure that all memory buffers that you used during the graph creation
|
||||||
|
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
||||||
|
// used during the graph evaluation to determine the arguments of the compute kernels.
|
||||||
|
//
|
||||||
|
// Synchronization between device and host memory (for example for input and output tensors)
|
||||||
|
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
// max memory buffers that can be mapped to the device
|
||||||
|
#define GGML_METAL_MAX_BUFFERS 64
|
||||||
|
|
||||||
|
struct ggml_tensor;
|
||||||
|
struct ggml_cgraph;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
// user-code should use only these functions
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||||
|
|
||||||
|
// helper to check if the device supports a specific family
|
||||||
|
// ideally, the user code should be doing these checks
|
||||||
|
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
|
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||||
|
|
||||||
|
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||||
|
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
36
bindings/ruby/ext/ggml-opencl.h
Normal file
36
bindings/ruby/ext/ggml-opencl.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_init(void);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
||||||
|
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
||||||
|
|
||||||
|
// GGML_API void * ggml_cl_host_malloc(size_t size);
|
||||||
|
// GGML_API void ggml_cl_host_free(void * ptr);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
|
||||||
|
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
|
||||||
|
|
||||||
|
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
||||||
|
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
12678
bindings/ruby/ext/ggml-quants.c
Normal file
12678
bindings/ruby/ext/ggml-quants.c
Normal file
File diff suppressed because it is too large
Load Diff
133
bindings/ruby/ext/ggml-quants.h
Normal file
133
bindings/ruby/ext/ggml-quants.h
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define GGML_COMMON_DECL_C
|
||||||
|
#include "ggml-common.h"
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
// GGML internal header
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Quantization
|
||||||
|
void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
// Dequantization
|
||||||
|
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
// Dot product
|
||||||
|
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
||||||
|
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
|
||||||
|
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
|
||||||
|
void iq2xs_init_impl(enum ggml_type type);
|
||||||
|
void iq2xs_free_impl(enum ggml_type type);
|
||||||
|
void iq3xs_init_impl(int grid_size);
|
||||||
|
void iq3xs_free_impl(int grid_size);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
49
bindings/ruby/ext/ggml-sycl.h
Normal file
49
bindings/ruby/ext/ggml-sycl.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
//
|
||||||
|
// MIT license
|
||||||
|
// Copyright (C) 2024 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
|
// devide buffer
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||||
|
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
||||||
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||||
|
|
||||||
|
// TODO: these are temporary
|
||||||
|
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||||
|
|
||||||
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
|
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
29
bindings/ruby/ext/ggml-vulkan.h
Normal file
29
bindings/ruby/ext/ggml-vulkan.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_VK_NAME "Vulkan"
|
||||||
|
#define GGML_VK_MAX_DEVICES 16
|
||||||
|
|
||||||
|
GGML_API void ggml_vk_instance_init(void);
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
@ -1,14 +0,0 @@
|
|||||||
ggml-metal-embed.o: \
|
|
||||||
ggml-metal.metal \
|
|
||||||
ggml-common.h
|
|
||||||
@echo "Embedding Metal library"
|
|
||||||
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
|
|
||||||
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
|
||||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
|
||||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
|
||||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
|
||||||
@echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
|
|
||||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
|
||||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
|
||||||
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
|
||||||
@rm -f ${TEMP_ASSEMBLY}
|
|
File diff suppressed because it is too large
Load Diff
@ -3,13 +3,6 @@
|
|||||||
|
|
||||||
#include "whisper.h"
|
#include "whisper.h"
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VALUE *context;
|
|
||||||
VALUE user_data;
|
|
||||||
VALUE callback;
|
|
||||||
VALUE callbacks;
|
|
||||||
} ruby_whisper_callback_container;
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct whisper_context *context;
|
struct whisper_context *context;
|
||||||
} ruby_whisper;
|
} ruby_whisper;
|
||||||
@ -17,9 +10,6 @@ typedef struct {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
struct whisper_full_params params;
|
struct whisper_full_params params;
|
||||||
bool diarize;
|
bool diarize;
|
||||||
ruby_whisper_callback_container *new_segment_callback_container;
|
|
||||||
ruby_whisper_callback_container *progress_callback_container;
|
|
||||||
ruby_whisper_callback_container *abort_callback_container;
|
|
||||||
} ruby_whisper_params;
|
} ruby_whisper_params;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
---
|
|
||||||
- ../../src/whisper.cpp
|
|
||||||
- ../../include/whisper.h
|
|
||||||
- ../../ggml/src/ggml.c
|
|
||||||
- ../../ggml/src/ggml-cpu.c
|
|
||||||
- ../../ggml/src/ggml-impl.h
|
|
||||||
- ../../ggml/src/ggml-aarch64.h
|
|
||||||
- ../../ggml/src/ggml-aarch64.c
|
|
||||||
- ../../ggml/src/ggml-alloc.c
|
|
||||||
- ../../ggml/src/ggml-backend-impl.h
|
|
||||||
- ../../ggml/src/ggml-backend.cpp
|
|
||||||
- ../../ggml/src/ggml-common.h
|
|
||||||
- ../../ggml/src/ggml-quants.h
|
|
||||||
- ../../ggml/src/ggml-quants.c
|
|
||||||
- ../../ggml/src/ggml-cpu-impl.h
|
|
||||||
- ../../ggml/src/ggml-metal.m
|
|
||||||
- ../../ggml/src/ggml-metal.metal
|
|
||||||
- ../../ggml/src/ggml-blas.cpp
|
|
||||||
- ../../ggml/include/ggml.h
|
|
||||||
- ../../ggml/include/ggml-alloc.h
|
|
||||||
- ../../ggml/include/ggml-backend.h
|
|
||||||
- ../../ggml/include/ggml-cpu.h
|
|
||||||
- ../../ggml/include/ggml-cuda.h
|
|
||||||
- ../../ggml/include/ggml-kompute.h
|
|
||||||
- ../../ggml/include/ggml-metal.h
|
|
||||||
- ../../ggml/include/ggml-sycl.h
|
|
||||||
- ../../ggml/include/ggml-vulkan.h
|
|
||||||
- ../../ggml/include/ggml-blas.h
|
|
||||||
- ../../scripts/get-flags.mk
|
|
||||||
- ../../examples/dr_wav.h
|
|
||||||
- ../../LICENSE
|
|
@ -1,7 +0,0 @@
|
|||||||
require "test/unit"
|
|
||||||
require "whisper"
|
|
||||||
|
|
||||||
class TestBase < Test::Unit::TestCase
|
|
||||||
MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
|
|
||||||
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
|
|
||||||
end
|
|
@ -1,163 +0,0 @@
|
|||||||
require "test/unit"
|
|
||||||
require "whisper"
|
|
||||||
|
|
||||||
class TestCallback < Test::Unit::TestCase
|
|
||||||
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
|
||||||
|
|
||||||
def setup
|
|
||||||
GC.start
|
|
||||||
@params = Whisper::Params.new
|
|
||||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
|
||||||
@audio = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_new_segment_callback
|
|
||||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
|
||||||
assert_kind_of Integer, n_new
|
|
||||||
assert n_new > 0
|
|
||||||
assert_same @whisper, context
|
|
||||||
|
|
||||||
n_segments = context.full_n_segments
|
|
||||||
n_new.times do |i|
|
|
||||||
i_segment = n_segments - 1 + i
|
|
||||||
start_time = context.full_get_segment_t0(i_segment) * 10
|
|
||||||
end_time = context.full_get_segment_t1(i_segment) * 10
|
|
||||||
text = context.full_get_segment_text(i_segment)
|
|
||||||
|
|
||||||
assert_kind_of Integer, start_time
|
|
||||||
assert start_time >= 0
|
|
||||||
assert_kind_of Integer, end_time
|
|
||||||
assert end_time > 0
|
|
||||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_new_segment_callback_closure
|
|
||||||
search_word = "what"
|
|
||||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
|
||||||
n_segments = context.full_n_segments
|
|
||||||
n_new.times do |i|
|
|
||||||
i_segment = n_segments - 1 + i
|
|
||||||
text = context.full_get_segment_text(i_segment)
|
|
||||||
if text.include?(search_word)
|
|
||||||
t0 = context.full_get_segment_t0(i_segment)
|
|
||||||
t1 = context.full_get_segment_t1(i_segment)
|
|
||||||
raise "search word '#{search_word}' found at between #{t0} and #{t1}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
assert_raise RuntimeError do
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_new_segment_callback_user_data
|
|
||||||
udata = Object.new
|
|
||||||
@params.new_segment_callback_user_data = udata
|
|
||||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
|
||||||
assert_same udata, user_data
|
|
||||||
}
|
|
||||||
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_new_segment_callback_user_data_gc
|
|
||||||
@params.new_segment_callback_user_data = "My user data"
|
|
||||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
|
||||||
assert_equal "My user data", user_data
|
|
||||||
}
|
|
||||||
GC.start
|
|
||||||
|
|
||||||
assert_same @whisper, @whisper.transcribe(@audio, @params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_progress_callback
|
|
||||||
first = nil
|
|
||||||
last = nil
|
|
||||||
@params.progress_callback = ->(context, state, progress, user_data) {
|
|
||||||
assert_kind_of Integer, progress
|
|
||||||
assert 0 <= progress && progress <= 100
|
|
||||||
assert_same @whisper, context
|
|
||||||
first = progress if first.nil?
|
|
||||||
last = progress
|
|
||||||
}
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert_equal 0, first
|
|
||||||
assert_equal 100, last
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_progress_callback_user_data
|
|
||||||
udata = Object.new
|
|
||||||
@params.progress_callback_user_data = udata
|
|
||||||
@params.progress_callback = ->(context, state, n_new, user_data) {
|
|
||||||
assert_same udata, user_data
|
|
||||||
}
|
|
||||||
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_on_progress
|
|
||||||
first = nil
|
|
||||||
last = nil
|
|
||||||
@params.on_progress do |progress|
|
|
||||||
assert_kind_of Integer, progress
|
|
||||||
assert 0 <= progress && progress <= 100
|
|
||||||
first = progress if first.nil?
|
|
||||||
last = progress
|
|
||||||
end
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert_equal 0, first
|
|
||||||
assert_equal 100, last
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_abort_callback
|
|
||||||
i = 0
|
|
||||||
@params.abort_callback = ->(user_data) {
|
|
||||||
assert_nil user_data
|
|
||||||
i += 1
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert i > 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_abort_callback_abort
|
|
||||||
i = 0
|
|
||||||
@params.abort_callback = ->(user_data) {
|
|
||||||
i += 1
|
|
||||||
return i == 3
|
|
||||||
}
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert_equal 3, i
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_abort_callback_user_data
|
|
||||||
udata = Object.new
|
|
||||||
@params.abort_callback_user_data = udata
|
|
||||||
yielded = nil
|
|
||||||
@params.abort_callback = ->(user_data) {
|
|
||||||
yielded = user_data
|
|
||||||
}
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert_same udata, yielded
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_abort_on
|
|
||||||
do_abort = false
|
|
||||||
aborted_from_callback = false
|
|
||||||
@params.on_new_segment do |segment|
|
|
||||||
do_abort = true if segment.text.match? /ask/
|
|
||||||
end
|
|
||||||
i = 0
|
|
||||||
@params.abort_on do
|
|
||||||
i += 1
|
|
||||||
do_abort
|
|
||||||
end
|
|
||||||
@whisper.transcribe(@audio, @params)
|
|
||||||
assert i > 0
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,44 +0,0 @@
|
|||||||
require_relative "helper"
|
|
||||||
|
|
||||||
class TestModel < TestBase
|
|
||||||
def test_model
|
|
||||||
whisper = Whisper::Context.new(MODEL)
|
|
||||||
assert_instance_of Whisper::Model, whisper.model
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_attributes
|
|
||||||
whisper = Whisper::Context.new(MODEL)
|
|
||||||
model = whisper.model
|
|
||||||
|
|
||||||
assert_equal 51864, model.n_vocab
|
|
||||||
assert_equal 1500, model.n_audio_ctx
|
|
||||||
assert_equal 512, model.n_audio_state
|
|
||||||
assert_equal 8, model.n_audio_head
|
|
||||||
assert_equal 6, model.n_audio_layer
|
|
||||||
assert_equal 448, model.n_text_ctx
|
|
||||||
assert_equal 512, model.n_text_state
|
|
||||||
assert_equal 8, model.n_text_head
|
|
||||||
assert_equal 6, model.n_text_layer
|
|
||||||
assert_equal 80, model.n_mels
|
|
||||||
assert_equal 1, model.ftype
|
|
||||||
assert_equal "base", model.type
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_gc
|
|
||||||
model = Whisper::Context.new(MODEL).model
|
|
||||||
GC.start
|
|
||||||
|
|
||||||
assert_equal 51864, model.n_vocab
|
|
||||||
assert_equal 1500, model.n_audio_ctx
|
|
||||||
assert_equal 512, model.n_audio_state
|
|
||||||
assert_equal 8, model.n_audio_head
|
|
||||||
assert_equal 6, model.n_audio_layer
|
|
||||||
assert_equal 448, model.n_text_ctx
|
|
||||||
assert_equal 512, model.n_text_state
|
|
||||||
assert_equal 8, model.n_text_head
|
|
||||||
assert_equal 6, model.n_text_layer
|
|
||||||
assert_equal 80, model.n_mels
|
|
||||||
assert_equal 1, model.ftype
|
|
||||||
assert_equal "base", model.type
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,31 +0,0 @@
|
|||||||
require_relative "helper"
|
|
||||||
require 'tempfile'
|
|
||||||
require 'tmpdir'
|
|
||||||
require 'shellwords'
|
|
||||||
|
|
||||||
class TestPackage < TestBase
|
|
||||||
def test_build
|
|
||||||
Tempfile.create do |file|
|
|
||||||
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
|
||||||
assert file.size > 0
|
|
||||||
assert_path_exist file.to_path
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
sub_test_case "Building binary on installation" do
|
|
||||||
def setup
|
|
||||||
system "rake", "build", exception: true
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_install
|
|
||||||
match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
|
|
||||||
filename = match_data[1]
|
|
||||||
version = match_data[2]
|
|
||||||
basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
|
|
||||||
Dir.mktmpdir do |dir|
|
|
||||||
system "gem", "install", "--install-dir", dir.shellescape, "pkg/#{filename.shellescape}", exception: true
|
|
||||||
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,154 +0,0 @@
|
|||||||
require_relative "helper"
|
|
||||||
|
|
||||||
class TestParams < TestBase
|
|
||||||
def setup
|
|
||||||
@params = Whisper::Params.new
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_language
|
|
||||||
@params.language = "en"
|
|
||||||
assert_equal @params.language, "en"
|
|
||||||
@params.language = "auto"
|
|
||||||
assert_equal @params.language, "auto"
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_offset
|
|
||||||
@params.offset = 10_000
|
|
||||||
assert_equal @params.offset, 10_000
|
|
||||||
@params.offset = 0
|
|
||||||
assert_equal @params.offset, 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_duration
|
|
||||||
@params.duration = 60_000
|
|
||||||
assert_equal @params.duration, 60_000
|
|
||||||
@params.duration = 0
|
|
||||||
assert_equal @params.duration, 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_max_text_tokens
|
|
||||||
@params.max_text_tokens = 300
|
|
||||||
assert_equal @params.max_text_tokens, 300
|
|
||||||
@params.max_text_tokens = 0
|
|
||||||
assert_equal @params.max_text_tokens, 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_translate
|
|
||||||
@params.translate = true
|
|
||||||
assert @params.translate
|
|
||||||
@params.translate = false
|
|
||||||
assert !@params.translate
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_no_context
|
|
||||||
@params.no_context = true
|
|
||||||
assert @params.no_context
|
|
||||||
@params.no_context = false
|
|
||||||
assert !@params.no_context
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_single_segment
|
|
||||||
@params.single_segment = true
|
|
||||||
assert @params.single_segment
|
|
||||||
@params.single_segment = false
|
|
||||||
assert !@params.single_segment
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_print_special
|
|
||||||
@params.print_special = true
|
|
||||||
assert @params.print_special
|
|
||||||
@params.print_special = false
|
|
||||||
assert !@params.print_special
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_print_progress
|
|
||||||
@params.print_progress = true
|
|
||||||
assert @params.print_progress
|
|
||||||
@params.print_progress = false
|
|
||||||
assert !@params.print_progress
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_print_realtime
|
|
||||||
@params.print_realtime = true
|
|
||||||
assert @params.print_realtime
|
|
||||||
@params.print_realtime = false
|
|
||||||
assert !@params.print_realtime
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_print_timestamps
|
|
||||||
@params.print_timestamps = true
|
|
||||||
assert @params.print_timestamps
|
|
||||||
@params.print_timestamps = false
|
|
||||||
assert !@params.print_timestamps
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_suppress_blank
|
|
||||||
@params.suppress_blank = true
|
|
||||||
assert @params.suppress_blank
|
|
||||||
@params.suppress_blank = false
|
|
||||||
assert !@params.suppress_blank
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_suppress_non_speech_tokens
|
|
||||||
@params.suppress_non_speech_tokens = true
|
|
||||||
assert @params.suppress_non_speech_tokens
|
|
||||||
@params.suppress_non_speech_tokens = false
|
|
||||||
assert !@params.suppress_non_speech_tokens
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_token_timestamps
|
|
||||||
@params.token_timestamps = true
|
|
||||||
assert @params.token_timestamps
|
|
||||||
@params.token_timestamps = false
|
|
||||||
assert !@params.token_timestamps
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_split_on_word
|
|
||||||
@params.split_on_word = true
|
|
||||||
assert @params.split_on_word
|
|
||||||
@params.split_on_word = false
|
|
||||||
assert !@params.split_on_word
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_initial_prompt
|
|
||||||
assert_nil @params.initial_prompt
|
|
||||||
@params.initial_prompt = "You are a polite person."
|
|
||||||
assert_equal "You are a polite person.", @params.initial_prompt
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_temperature
|
|
||||||
assert_equal 0.0, @params.temperature
|
|
||||||
@params.temperature = 0.5
|
|
||||||
assert_equal 0.5, @params.temperature
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_max_initial_ts
|
|
||||||
assert_equal 1.0, @params.max_initial_ts
|
|
||||||
@params.max_initial_ts = 600.0
|
|
||||||
assert_equal 600.0, @params.max_initial_ts
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_length_penalty
|
|
||||||
assert_equal -1.0, @params.length_penalty
|
|
||||||
@params.length_penalty = 0.5
|
|
||||||
assert_equal 0.5, @params.length_penalty
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_temperature_inc
|
|
||||||
assert_in_delta 0.2, @params.temperature_inc
|
|
||||||
@params.temperature_inc = 0.5
|
|
||||||
assert_in_delta 0.5, @params.temperature_inc
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_entropy_thold
|
|
||||||
assert_in_delta 2.4, @params.entropy_thold
|
|
||||||
@params.entropy_thold = 3.0
|
|
||||||
assert_in_delta 3.0, @params.entropy_thold
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_logprob_thold
|
|
||||||
assert_in_delta -1.0, @params.logprob_thold
|
|
||||||
@params.logprob_thold = -0.5
|
|
||||||
assert_in_delta -0.5, @params.logprob_thold
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,83 +0,0 @@
|
|||||||
require_relative "helper"
|
|
||||||
|
|
||||||
class TestSegment < TestBase
|
|
||||||
class << self
|
|
||||||
attr_reader :whisper
|
|
||||||
|
|
||||||
def startup
|
|
||||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
|
||||||
params = Whisper::Params.new
|
|
||||||
params.print_timestamps = false
|
|
||||||
@whisper.transcribe(TestBase::AUDIO, params)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_iteration
|
|
||||||
whisper.each_segment do |segment|
|
|
||||||
assert_instance_of Whisper::Segment, segment
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_enumerator
|
|
||||||
enum = whisper.each_segment
|
|
||||||
assert_instance_of Enumerator, enum
|
|
||||||
enum.to_a.each_with_index do |segment, index|
|
|
||||||
assert_instance_of Whisper::Segment, segment
|
|
||||||
assert_kind_of Integer, index
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_start_time
|
|
||||||
i = 0
|
|
||||||
whisper.each_segment do |segment|
|
|
||||||
assert_equal 0, segment.start_time if i == 0
|
|
||||||
i += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_end_time
|
|
||||||
i = 0
|
|
||||||
whisper.each_segment do |segment|
|
|
||||||
assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
|
|
||||||
i += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_on_new_segment
|
|
||||||
params = Whisper::Params.new
|
|
||||||
seg = nil
|
|
||||||
index = 0
|
|
||||||
params.on_new_segment do |segment|
|
|
||||||
assert_instance_of Whisper::Segment, segment
|
|
||||||
if index == 0
|
|
||||||
seg = segment
|
|
||||||
assert_equal 0, segment.start_time
|
|
||||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
|
|
||||||
end
|
|
||||||
index += 1
|
|
||||||
end
|
|
||||||
whisper.transcribe(AUDIO, params)
|
|
||||||
assert_equal 0, seg.start_time
|
|
||||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_on_new_segment_twice
|
|
||||||
params = Whisper::Params.new
|
|
||||||
seg = nil
|
|
||||||
params.on_new_segment do |segment|
|
|
||||||
seg = segment
|
|
||||||
return
|
|
||||||
end
|
|
||||||
params.on_new_segment do |segment|
|
|
||||||
assert_same seg, segment
|
|
||||||
return
|
|
||||||
end
|
|
||||||
whisper.transcribe(AUDIO, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def whisper
|
|
||||||
self.class.whisper
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,127 +1,131 @@
|
|||||||
require_relative "helper"
|
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
||||||
require "stringio"
|
EXTDIR = File.join(TOPDIR, 'ext')
|
||||||
|
#$LIBDIR = File.join(TOPDIR, 'lib')
|
||||||
|
#$:.unshift(LIBDIR)
|
||||||
|
$:.unshift(EXTDIR)
|
||||||
|
|
||||||
# Exists to detect memory-related bug
|
require 'whisper'
|
||||||
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
require 'test/unit'
|
||||||
|
|
||||||
class TestWhisper < TestBase
|
class TestWhisper < Test::Unit::TestCase
|
||||||
def setup
|
def setup
|
||||||
@params = Whisper::Params.new
|
@params = Whisper::Params.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_language
|
||||||
|
@params.language = "en"
|
||||||
|
assert_equal @params.language, "en"
|
||||||
|
@params.language = "auto"
|
||||||
|
assert_equal @params.language, "auto"
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_offset
|
||||||
|
@params.offset = 10_000
|
||||||
|
assert_equal @params.offset, 10_000
|
||||||
|
@params.offset = 0
|
||||||
|
assert_equal @params.offset, 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_duration
|
||||||
|
@params.duration = 60_000
|
||||||
|
assert_equal @params.duration, 60_000
|
||||||
|
@params.duration = 0
|
||||||
|
assert_equal @params.duration, 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_max_text_tokens
|
||||||
|
@params.max_text_tokens = 300
|
||||||
|
assert_equal @params.max_text_tokens, 300
|
||||||
|
@params.max_text_tokens = 0
|
||||||
|
assert_equal @params.max_text_tokens, 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_translate
|
||||||
|
@params.translate = true
|
||||||
|
assert @params.translate
|
||||||
|
@params.translate = false
|
||||||
|
assert !@params.translate
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_no_context
|
||||||
|
@params.no_context = true
|
||||||
|
assert @params.no_context
|
||||||
|
@params.no_context = false
|
||||||
|
assert !@params.no_context
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_single_segment
|
||||||
|
@params.single_segment = true
|
||||||
|
assert @params.single_segment
|
||||||
|
@params.single_segment = false
|
||||||
|
assert !@params.single_segment
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_print_special
|
||||||
|
@params.print_special = true
|
||||||
|
assert @params.print_special
|
||||||
|
@params.print_special = false
|
||||||
|
assert !@params.print_special
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_print_progress
|
||||||
|
@params.print_progress = true
|
||||||
|
assert @params.print_progress
|
||||||
|
@params.print_progress = false
|
||||||
|
assert !@params.print_progress
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_print_realtime
|
||||||
|
@params.print_realtime = true
|
||||||
|
assert @params.print_realtime
|
||||||
|
@params.print_realtime = false
|
||||||
|
assert !@params.print_realtime
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_print_timestamps
|
||||||
|
@params.print_timestamps = true
|
||||||
|
assert @params.print_timestamps
|
||||||
|
@params.print_timestamps = false
|
||||||
|
assert !@params.print_timestamps
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_suppress_blank
|
||||||
|
@params.suppress_blank = true
|
||||||
|
assert @params.suppress_blank
|
||||||
|
@params.suppress_blank = false
|
||||||
|
assert !@params.suppress_blank
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_suppress_non_speech_tokens
|
||||||
|
@params.suppress_non_speech_tokens = true
|
||||||
|
assert @params.suppress_non_speech_tokens
|
||||||
|
@params.suppress_non_speech_tokens = false
|
||||||
|
assert !@params.suppress_non_speech_tokens
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_token_timestamps
|
||||||
|
@params.token_timestamps = true
|
||||||
|
assert @params.token_timestamps
|
||||||
|
@params.token_timestamps = false
|
||||||
|
assert !@params.token_timestamps
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_split_on_word
|
||||||
|
@params.split_on_word = true
|
||||||
|
assert @params.split_on_word
|
||||||
|
@params.split_on_word = false
|
||||||
|
assert !@params.split_on_word
|
||||||
|
end
|
||||||
|
|
||||||
def test_whisper
|
def test_whisper
|
||||||
@whisper = Whisper::Context.new(MODEL)
|
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||||
params = Whisper::Params.new
|
params = Whisper::Params.new
|
||||||
params.print_timestamps = false
|
params.print_timestamps = false
|
||||||
|
|
||||||
@whisper.transcribe(AUDIO, params) {|text|
|
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||||
|
@whisper.transcribe(jfk, params) {|text|
|
||||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
sub_test_case "After transcription" do
|
|
||||||
class << self
|
|
||||||
attr_reader :whisper
|
|
||||||
|
|
||||||
def startup
|
|
||||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
|
||||||
params = Whisper::Params.new
|
|
||||||
params.print_timestamps = false
|
|
||||||
@whisper.transcribe(TestBase::AUDIO, params)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def whisper
|
|
||||||
self.class.whisper
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_n_segments
|
|
||||||
assert_equal 1, whisper.full_n_segments
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_lang_id
|
|
||||||
assert_equal 0, whisper.full_lang_id
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_get_segment_t0
|
|
||||||
assert_equal 0, whisper.full_get_segment_t0(0)
|
|
||||||
assert_raise IndexError do
|
|
||||||
whisper.full_get_segment_t0(whisper.full_n_segments)
|
|
||||||
end
|
|
||||||
assert_raise IndexError do
|
|
||||||
whisper.full_get_segment_t0(-1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_get_segment_t1
|
|
||||||
t1 = whisper.full_get_segment_t1(0)
|
|
||||||
assert_kind_of Integer, t1
|
|
||||||
assert t1 > 0
|
|
||||||
assert_raise IndexError do
|
|
||||||
whisper.full_get_segment_t1(whisper.full_n_segments)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_get_segment_speaker_turn_next
|
|
||||||
assert_false whisper.full_get_segment_speaker_turn_next(0)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_full_get_segment_text
|
|
||||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_lang_max_id
|
|
||||||
assert_kind_of Integer, Whisper.lang_max_id
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_lang_id
|
|
||||||
assert_equal 0, Whisper.lang_id("en")
|
|
||||||
assert_raise ArgumentError do
|
|
||||||
Whisper.lang_id("non existing language")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_lang_str
|
|
||||||
assert_equal "en", Whisper.lang_str(0)
|
|
||||||
assert_raise IndexError do
|
|
||||||
Whisper.lang_str(Whisper.lang_max_id + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_lang_str_full
|
|
||||||
assert_equal "english", Whisper.lang_str_full(0)
|
|
||||||
assert_raise IndexError do
|
|
||||||
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_log_set
|
|
||||||
user_data = Object.new
|
|
||||||
logs = []
|
|
||||||
log_callback = ->(level, buffer, udata) {
|
|
||||||
logs << [level, buffer, udata]
|
|
||||||
}
|
|
||||||
Whisper.log_set log_callback, user_data
|
|
||||||
Whisper::Context.new(MODEL)
|
|
||||||
|
|
||||||
assert logs.length > 30
|
|
||||||
logs.each do |log|
|
|
||||||
assert_equal Whisper::LOG_LEVEL_INFO, log[0]
|
|
||||||
assert_same user_data, log[2]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_log_suppress
|
|
||||||
stderr = $stderr
|
|
||||||
Whisper.log_set ->(level, buffer, user_data) {
|
|
||||||
# do nothing
|
|
||||||
}, nil
|
|
||||||
dev = StringIO.new("")
|
|
||||||
$stderr = dev
|
|
||||||
Whisper::Context.new(MODEL)
|
|
||||||
assert_empty dev.string
|
|
||||||
ensure
|
|
||||||
$stderr = stderr
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
require "yaml"
|
|
||||||
|
|
||||||
Gem::Specification.new do |s|
|
Gem::Specification.new do |s|
|
||||||
s.name = "whispercpp"
|
s.name = "whispercpp"
|
||||||
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
||||||
@ -9,16 +7,10 @@ Gem::Specification.new do |s|
|
|||||||
s.email = 'todd.fisher@gmail.com'
|
s.email = 'todd.fisher@gmail.com'
|
||||||
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
||||||
|
|
||||||
s.files = `git ls-files . -z`.split("\x0") +
|
s.files = ["LICENSE", "README.md", "Rakefile", "ext/extconf.rb", "ext/ggml.c", "ext/ruby_whisper.cpp", "ext/whisper.cpp", "ext/dr_wav.h", "ext/ggml.h", "ext/ruby_whisper.h", "ext/whisper.h"]
|
||||||
YAML.load_file("extsources.yaml").collect {|file|
|
|
||||||
basename = File.basename(file)
|
|
||||||
if s.extra_rdoc_files.include?(basename)
|
|
||||||
basename
|
|
||||||
else
|
|
||||||
File.join("ext", basename)
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
|
#### Load-time details
|
||||||
|
s.require_paths = ['lib','ext']
|
||||||
s.summary = %q{Ruby whisper.cpp bindings}
|
s.summary = %q{Ruby whisper.cpp bindings}
|
||||||
s.test_files = ["tests/test_whisper.rb"]
|
s.test_files = ["tests/test_whisper.rb"]
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
set(WHISPER_VERSION @WHISPER_INSTALL_VERSION@)
|
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
||||||
set(WHISPER_BUILD_COMMIT @WHISPER_BUILD_COMMIT@)
|
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
set(WHISPER_BUILD_NUMBER @WHISPER_BUILD_NUMBER@)
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
set(WHISPER_SHARED_LIB @BUILD_SHARED_LIBS@)
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
set(GGML_BLAS @GGML_BLAS@)
|
set(GGML_BLAS @GGML_BLAS@)
|
||||||
set(GGML_CUDA @GGML_CUDA@)
|
set(GGML_CUDA @GGML_CUDA@)
|
||||||
@ -11,9 +11,9 @@ set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
|||||||
|
|
||||||
@PACKAGE_INIT@
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
set_and_check(WHISPER_INCLUDE_DIR "@PACKAGE_WHISPER_INCLUDE_INSTALL_DIR@")
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
set_and_check(WHISPER_LIB_DIR "@PACKAGE_WHISPER_LIB_INSTALL_DIR@")
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
set_and_check(WHISPER_BIN_DIR "@PACKAGE_WHISPER_BIN_INSTALL_DIR@")
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
# Ensure transient dependencies satisfied
|
# Ensure transient dependencies satisfied
|
||||||
|
|
||||||
@ -43,23 +43,23 @@ if (GGML_HIPBLAS)
|
|||||||
find_package(rocblas REQUIRED)
|
find_package(rocblas REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_library(whisper_LIBRARY whisper
|
find_library(llama_LIBRARY llama
|
||||||
REQUIRED
|
REQUIRED
|
||||||
HINTS ${WHISPER_LIB_DIR})
|
HINTS ${LLAMA_LIB_DIR})
|
||||||
|
|
||||||
set(_whisper_link_deps "Threads::Threads" "@WHISPER_EXTRA_LIBS@")
|
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||||
set(_whisper_transient_defines "@WHISPER_TRANSIENT_DEFINES@")
|
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||||
|
|
||||||
add_library(whisper UNKNOWN IMPORTED)
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
|
||||||
set_target_properties(whisper
|
set_target_properties(llama
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${WHISPER_INCLUDE_DIR}"
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
INTERFACE_LINK_LIBRARIES "${_whisper_link_deps}"
|
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||||
INTERFACE_COMPILE_DEFINITIONS "${_whisper_transient_defines}"
|
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
IMPORTED_LOCATION "${whisper_LIBRARY}"
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
INTERFACE_COMPILE_FEATURES cxx_std_11
|
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||||
POSITION_INDEPENDENT_CODE ON )
|
POSITION_INDEPENDENT_CODE ON )
|
||||||
|
|
||||||
check_required_components(whisper)
|
check_required_components(Llama)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
prefix=@CMAKE_INSTALL_PREFIX@
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
exec_prefix=${prefix}
|
exec_prefix=${prefix}
|
||||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
libdir=${exec_prefix}/lib
|
||||||
includedir=${prefix}/include
|
includedir=${prefix}/include
|
||||||
|
|
||||||
Name: whisper
|
Name: whisper
|
||||||
|
@ -102,8 +102,8 @@ if (EMSCRIPTEN)
|
|||||||
set_target_properties(libstream PROPERTIES FOLDER "libs")
|
set_target_properties(libstream PROPERTIES FOLDER "libs")
|
||||||
add_subdirectory(command.wasm)
|
add_subdirectory(command.wasm)
|
||||||
set_target_properties(libcommand PROPERTIES FOLDER "libs")
|
set_target_properties(libcommand PROPERTIES FOLDER "libs")
|
||||||
#add_subdirectory(talk.wasm)
|
add_subdirectory(talk.wasm)
|
||||||
#set_target_properties(libtalk PROPERTIES FOLDER "libs")
|
set_target_properties(libtalk PROPERTIES FOLDER "libs")
|
||||||
add_subdirectory(bench.wasm)
|
add_subdirectory(bench.wasm)
|
||||||
set_target_properties(libbench PROPERTIES FOLDER "libs")
|
set_target_properties(libbench PROPERTIES FOLDER "libs")
|
||||||
elseif(CMAKE_JS_VERSION)
|
elseif(CMAKE_JS_VERSION)
|
||||||
@ -127,17 +127,15 @@ endif (WHISPER_SDL2)
|
|||||||
add_subdirectory(quantize)
|
add_subdirectory(quantize)
|
||||||
set_target_properties(quantize PROPERTIES FOLDER "examples")
|
set_target_properties(quantize PROPERTIES FOLDER "examples")
|
||||||
if (WHISPER_SDL2)
|
if (WHISPER_SDL2)
|
||||||
# TODO: disabled until update
|
add_subdirectory(talk)
|
||||||
# https://github.com/ggerganov/whisper.cpp/issues/1818
|
set_target_properties(talk PROPERTIES FOLDER "examples")
|
||||||
#add_subdirectory(talk)
|
|
||||||
#set_target_properties(talk PROPERTIES FOLDER "examples")
|
|
||||||
add_subdirectory(talk-llama)
|
add_subdirectory(talk-llama)
|
||||||
set_target_properties(talk-llama PROPERTIES FOLDER "examples")
|
set_target_properties(talk-llama PROPERTIES FOLDER "examples")
|
||||||
add_subdirectory(lsp)
|
add_subdirectory(lsp)
|
||||||
set_target_properties(lsp PROPERTIES FOLDER "examples")
|
set_target_properties(lsp PROPERTIES FOLDER "examples")
|
||||||
if (GGML_SYCL)
|
if (GGML_SYCL)
|
||||||
add_subdirectory(sycl)
|
add_subdirectory(sycl)
|
||||||
set_target_properties(ls-sycl-device PROPERTIES FOLDER "examples")
|
set_target_properties(sycl PROPERTIES FOLDER "examples")
|
||||||
endif()
|
endif()
|
||||||
endif (WHISPER_SDL2)
|
endif (WHISPER_SDL2)
|
||||||
endif()
|
endif()
|
||||||
|
@ -72,9 +72,6 @@ bool ggml_common_quantize_0(
|
|||||||
case GGML_FTYPE_MOSTLY_IQ4_XS:
|
case GGML_FTYPE_MOSTLY_IQ4_XS:
|
||||||
case GGML_FTYPE_MOSTLY_IQ1_M:
|
case GGML_FTYPE_MOSTLY_IQ1_M:
|
||||||
case GGML_FTYPE_MOSTLY_BF16:
|
case GGML_FTYPE_MOSTLY_BF16:
|
||||||
case GGML_FTYPE_MOSTLY_Q4_0_4_4:
|
|
||||||
case GGML_FTYPE_MOSTLY_Q4_0_4_8:
|
|
||||||
case GGML_FTYPE_MOSTLY_Q4_0_8_8:
|
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
|
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
|
||||||
return false;
|
return false;
|
||||||
@ -212,11 +209,6 @@ bool ggml_common_quantize_0(
|
|||||||
case GGML_TYPE_IQ4_XS:
|
case GGML_TYPE_IQ4_XS:
|
||||||
case GGML_TYPE_IQ1_M:
|
case GGML_TYPE_IQ1_M:
|
||||||
case GGML_TYPE_BF16:
|
case GGML_TYPE_BF16:
|
||||||
case GGML_TYPE_Q4_0_4_4:
|
|
||||||
case GGML_TYPE_Q4_0_4_8:
|
|
||||||
case GGML_TYPE_Q4_0_8_8:
|
|
||||||
case GGML_TYPE_TQ1_0:
|
|
||||||
case GGML_TYPE_TQ2_0:
|
|
||||||
case GGML_TYPE_COUNT:
|
case GGML_TYPE_COUNT:
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
|
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
|
||||||
|
@ -147,6 +147,7 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
|||||||
case 7: return "He";
|
case 7: return "He";
|
||||||
case 8: return "She";
|
case 8: return "She";
|
||||||
case 9: return "They";
|
case 9: return "They";
|
||||||
|
default: return "To";
|
||||||
}
|
}
|
||||||
|
|
||||||
return "The";
|
return "The";
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
#define COMMON_SAMPLE_RATE 16000
|
#define COMMON_SAMPLE_RATE 16000
|
||||||
|
|
||||||
@ -287,43 +286,12 @@ void sam_print_usage(int argc, char ** argv, const sam_params & params);
|
|||||||
// Terminal utils
|
// Terminal utils
|
||||||
//
|
//
|
||||||
|
|
||||||
#define SQR(X) ((X) * (X))
|
|
||||||
#define UNCUBE(x) x < 48 ? 0 : x < 115 ? 1 : (x - 35) / 40
|
|
||||||
|
|
||||||
/**
|
// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
|
||||||
* Quantizes 24-bit RGB to xterm256 code range [16,256).
|
// Lowest is red, middle is yellow, highest is green.
|
||||||
*/
|
|
||||||
static int rgb2xterm256(int r, int g, int b) {
|
|
||||||
unsigned char cube[] = {0, 0137, 0207, 0257, 0327, 0377};
|
|
||||||
int av, ir, ig, ib, il, qr, qg, qb, ql;
|
|
||||||
av = r * .299 + g * .587 + b * .114 + .5;
|
|
||||||
ql = (il = av > 238 ? 23 : (av - 3) / 10) * 10 + 8;
|
|
||||||
qr = cube[(ir = UNCUBE(r))];
|
|
||||||
qg = cube[(ig = UNCUBE(g))];
|
|
||||||
qb = cube[(ib = UNCUBE(b))];
|
|
||||||
if (SQR(qr - r) + SQR(qg - g) + SQR(qb - b) <=
|
|
||||||
SQR(ql - r) + SQR(ql - g) + SQR(ql - b))
|
|
||||||
return ir * 36 + ig * 6 + ib + 020;
|
|
||||||
return il + 0350;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string set_xterm256_foreground(int r, int g, int b) {
|
|
||||||
int x = rgb2xterm256(r, g, b);
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "\033[38;5;" << x << "m";
|
|
||||||
return oss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lowest is red, middle is yellow, highest is green. Color scheme from
|
|
||||||
// Paul Tol; it is colorblind friendly https://personal.sron.nl/~pault/
|
|
||||||
const std::vector<std::string> k_colors = {
|
const std::vector<std::string> k_colors = {
|
||||||
set_xterm256_foreground(220, 5, 12),
|
"\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
|
||||||
set_xterm256_foreground(232, 96, 28),
|
"\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
|
||||||
set_xterm256_foreground(241, 147, 45),
|
|
||||||
set_xterm256_foreground(246, 193, 65),
|
|
||||||
set_xterm256_foreground(247, 240, 86),
|
|
||||||
set_xterm256_foreground(144, 201, 135),
|
|
||||||
set_xterm256_foreground( 78, 178, 101),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
|
4303
examples/dr_wav.h
4303
examples/dr_wav.h
File diff suppressed because it is too large
Load Diff
@ -204,6 +204,8 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
|
|||||||
const size_t errbuffsize = 1024;
|
const size_t errbuffsize = 1024;
|
||||||
char errbuff[errbuffsize];
|
char errbuff[errbuffsize];
|
||||||
|
|
||||||
|
av_register_all(); // from avformat. Still a must-have call for ffmpeg v3! (can be skipped for later versions)
|
||||||
|
|
||||||
fmt_ctx = avformat_alloc_context();
|
fmt_ctx = avformat_alloc_context();
|
||||||
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
|
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
|
||||||
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);
|
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);
|
||||||
@ -319,7 +321,7 @@ int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_da
|
|||||||
LOG("Couldn't map input file %s\n", ifname.c_str());
|
LOG("Couldn't map input file %s\n", ifname.c_str());
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
LOG("Mapped input file: %s size: %d\n", ibuf, (int) ibuf_size);
|
LOG("Mapped input file: %x size: %d\n", ibuf, ibuf_size);
|
||||||
struct audio_buffer inaudio_buf;
|
struct audio_buffer inaudio_buf;
|
||||||
inaudio_buf.ptr = ibuf;
|
inaudio_buf.ptr = ibuf;
|
||||||
inaudio_buf.size = ibuf_size;
|
inaudio_buf.size = ibuf_size;
|
||||||
|
@ -48,7 +48,7 @@ if [ -n "$3" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Whisper models
|
# Whisper models
|
||||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
|
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
||||||
|
|
||||||
# list available models
|
# list available models
|
||||||
function list_models {
|
function list_models {
|
||||||
|
@ -997,7 +997,6 @@ int main(int argc, char ** argv) {
|
|||||||
if (params.dtw == "large.v1") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V1;
|
if (params.dtw == "large.v1") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V1;
|
||||||
if (params.dtw == "large.v2") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V2;
|
if (params.dtw == "large.v2") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V2;
|
||||||
if (params.dtw == "large.v3") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3;
|
if (params.dtw == "large.v3") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3;
|
||||||
if (params.dtw == "large.v3.turbo") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3_TURBO;
|
|
||||||
|
|
||||||
if (cparams.dtw_aheads_preset == WHISPER_AHEADS_NONE) {
|
if (cparams.dtw_aheads_preset == WHISPER_AHEADS_NONE) {
|
||||||
fprintf(stderr, "error: unknown DTW preset '%s'\n", params.dtw.c_str());
|
fprintf(stderr, "error: unknown DTW preset '%s'\n", params.dtw.c_str());
|
||||||
|
@ -21,7 +21,7 @@ def process_audio(wav_file, model_name="base.en"):
|
|||||||
if not os.path.exists(wav_file):
|
if not os.path.exists(wav_file):
|
||||||
raise FileNotFoundError(f"WAV file not found: {wav_file}")
|
raise FileNotFoundError(f"WAV file not found: {wav_file}")
|
||||||
|
|
||||||
full_command = f"./main -m {model} -f {wav_file} -nt"
|
full_command = f"./main -m {model} -f {wav_file} -np -nt"
|
||||||
|
|
||||||
# Execute the command
|
# Execute the command
|
||||||
process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
@ -34,7 +34,6 @@ struct server_params
|
|||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = "examples/server/public";
|
std::string public_path = "examples/server/public";
|
||||||
std::string request_path = "";
|
std::string request_path = "";
|
||||||
std::string inference_path = "/inference";
|
|
||||||
|
|
||||||
int32_t port = 8080;
|
int32_t port = 8080;
|
||||||
int32_t read_timeout = 600;
|
int32_t read_timeout = 600;
|
||||||
@ -133,7 +132,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
||||||
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
||||||
fprintf(stderr, " --request-path PATH, [%-7s] Request path for all requests\n", sparams.request_path.c_str());
|
fprintf(stderr, " --request-path PATH, [%-7s] Request path for all requests\n", sparams.request_path.c_str());
|
||||||
fprintf(stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n", sparams.inference_path.c_str());
|
|
||||||
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
|
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
@ -184,7 +182,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
|
|||||||
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
||||||
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
||||||
else if ( arg == "--request-path") { sparams.request_path = argv[++i]; }
|
else if ( arg == "--request-path") { sparams.request_path = argv[++i]; }
|
||||||
else if ( arg == "--inference-path") { sparams.inference_path = argv[++i]; }
|
|
||||||
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
|
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
@ -219,7 +216,7 @@ void check_ffmpeg_availibility() {
|
|||||||
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
|
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
|
||||||
std::ostringstream cmd_stream;
|
std::ostringstream cmd_stream;
|
||||||
std::string converted_filename_temp = temp_filename + "_temp.wav";
|
std::string converted_filename_temp = temp_filename + "_temp.wav";
|
||||||
cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -y -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
|
cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
|
||||||
std::string cmd = cmd_stream.str();
|
std::string cmd = cmd_stream.str();
|
||||||
|
|
||||||
int status = std::system(cmd.c_str());
|
int status = std::system(cmd.c_str());
|
||||||
@ -647,10 +644,10 @@ int main(int argc, char ** argv) {
|
|||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
|
|
||||||
svr.Options(sparams.request_path + sparams.inference_path, [&](const Request &, Response &){
|
svr.Options(sparams.request_path + "/inference", [&](const Request &, Response &){
|
||||||
});
|
});
|
||||||
|
|
||||||
svr.Post(sparams.request_path + sparams.inference_path, [&](const Request &req, Response &res){
|
svr.Post(sparams.request_path + "/inference", [&](const Request &req, Response &res){
|
||||||
// acquire whisper model mutex lock
|
// acquire whisper model mutex lock
|
||||||
std::lock_guard<std::mutex> lock(whisper_mutex);
|
std::lock_guard<std::mutex> lock(whisper_mutex);
|
||||||
|
|
||||||
@ -677,8 +674,7 @@ int main(int argc, char ** argv) {
|
|||||||
if (sparams.ffmpeg_converter) {
|
if (sparams.ffmpeg_converter) {
|
||||||
// if file is not wav, convert to wav
|
// if file is not wav, convert to wav
|
||||||
// write to temporary file
|
// write to temporary file
|
||||||
const std::string temp_filename_base = std::tmpnam(nullptr);
|
const std::string temp_filename = "whisper_server_temp_file.wav";
|
||||||
const std::string temp_filename = temp_filename_base + ".wav";
|
|
||||||
std::ofstream temp_file{temp_filename, std::ios::binary};
|
std::ofstream temp_file{temp_filename, std::ios::binary};
|
||||||
temp_file << audio_file.content;
|
temp_file << audio_file.content;
|
||||||
temp_file.close();
|
temp_file.close();
|
||||||
|
@ -5,5 +5,5 @@
|
|||||||
set(TARGET ls-sycl-device)
|
set(TARGET ls-sycl-device)
|
||||||
add_executable(${TARGET} ls-sycl-device.cpp)
|
add_executable(${TARGET} ls-sycl-device.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
@ -7,13 +7,10 @@ cd build
|
|||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
#for FP16
|
#for FP16
|
||||||
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON # faster for long-prompt inference
|
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON # faster for long-prompt inference
|
||||||
|
|
||||||
#for FP32
|
#for FP32
|
||||||
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
#for other features from the examples, e.g. stream and talk link with SDL2:
|
|
||||||
#cmake .. -DGGML_SYCL=ON -DWHISPER_SDL2=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
|
||||||
|
|
||||||
#build example/main only
|
#build example/main only
|
||||||
#cmake --build . --config Release --target main
|
#cmake --build . --config Release --target main
|
||||||
|
@ -1,13 +1,7 @@
|
|||||||
if (WHISPER_SDL2)
|
if (WHISPER_SDL2)
|
||||||
# talk-llama
|
# talk-llama
|
||||||
set(TARGET talk-llama)
|
set(TARGET talk-llama)
|
||||||
add_executable(${TARGET} talk-llama.cpp
|
add_executable(${TARGET} talk-llama.cpp llama.cpp unicode.cpp unicode-data.cpp)
|
||||||
llama.cpp
|
|
||||||
llama-vocab.cpp
|
|
||||||
llama-grammar.cpp
|
|
||||||
llama-sampling.cpp
|
|
||||||
unicode.cpp
|
|
||||||
unicode-data.cpp)
|
|
||||||
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
|
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
|
||||||
|
|
||||||
if (WHISPER_CLBLAST)
|
if (WHISPER_CLBLAST)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,144 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "llama-impl.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
struct llama_vocab;
|
|
||||||
|
|
||||||
// grammar element type
|
|
||||||
enum llama_gretype {
|
|
||||||
// end of rule definition
|
|
||||||
LLAMA_GRETYPE_END = 0,
|
|
||||||
|
|
||||||
// start of alternate definition for rule
|
|
||||||
LLAMA_GRETYPE_ALT = 1,
|
|
||||||
|
|
||||||
// non-terminal element: reference to rule
|
|
||||||
LLAMA_GRETYPE_RULE_REF = 2,
|
|
||||||
|
|
||||||
// terminal element: character (code point)
|
|
||||||
LLAMA_GRETYPE_CHAR = 3,
|
|
||||||
|
|
||||||
// inverse char(s) ([^a], [^a-b] [^abc])
|
|
||||||
LLAMA_GRETYPE_CHAR_NOT = 4,
|
|
||||||
|
|
||||||
// modifies a preceding LLAMA_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_ALT to
|
|
||||||
// be an inclusive range ([a-z])
|
|
||||||
LLAMA_GRETYPE_CHAR_RNG_UPPER = 5,
|
|
||||||
|
|
||||||
// modifies a preceding LLAMA_GRETYPE_CHAR or
|
|
||||||
// LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
|
|
||||||
LLAMA_GRETYPE_CHAR_ALT = 6,
|
|
||||||
|
|
||||||
// any character (.)
|
|
||||||
LLAMA_GRETYPE_CHAR_ANY = 7,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct llama_grammar_element {
|
|
||||||
enum llama_gretype type;
|
|
||||||
uint32_t value; // Unicode code point or rule ID
|
|
||||||
} llama_grammar_element;
|
|
||||||
|
|
||||||
struct llama_partial_utf8 {
|
|
||||||
uint32_t value; // bit value so far (unshifted)
|
|
||||||
int n_remain; // num bytes remaining; -1 indicates invalid sequence
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_grammar_candidate {
|
|
||||||
size_t index;
|
|
||||||
const uint32_t * code_points;
|
|
||||||
llama_partial_utf8 partial_utf8;
|
|
||||||
};
|
|
||||||
|
|
||||||
using llama_grammar_rule = std::vector< llama_grammar_element>;
|
|
||||||
using llama_grammar_stack = std::vector<const llama_grammar_element *>;
|
|
||||||
|
|
||||||
using llama_grammar_rules = std::vector<llama_grammar_rule>;
|
|
||||||
using llama_grammar_stacks = std::vector<llama_grammar_stack>;
|
|
||||||
using llama_grammar_candidates = std::vector<llama_grammar_candidate>;
|
|
||||||
|
|
||||||
const llama_grammar_rules & llama_grammar_get_rules (const struct llama_grammar * grammar);
|
|
||||||
llama_grammar_stacks & llama_grammar_get_stacks( struct llama_grammar * grammar);
|
|
||||||
|
|
||||||
// takes a set of possible pushdown stacks on a grammar, which are required to
|
|
||||||
// be positioned at a character range (see `llama_grammar_advance_stack`), and
|
|
||||||
// produces the N possible stacks if the given char is accepted at those
|
|
||||||
// positions
|
|
||||||
void llama_grammar_accept(
|
|
||||||
const llama_grammar_rules & rules,
|
|
||||||
const llama_grammar_stacks & stacks,
|
|
||||||
uint32_t chr,
|
|
||||||
llama_grammar_stacks & stacks_new);
|
|
||||||
|
|
||||||
std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_stack(
|
|
||||||
const llama_grammar_rules & rules,
|
|
||||||
const llama_grammar_stack & stack,
|
|
||||||
const llama_grammar_candidates & candidates);
|
|
||||||
|
|
||||||
struct llama_grammar_parser {
|
|
||||||
std::map<std::string, uint32_t> symbol_ids;
|
|
||||||
|
|
||||||
llama_grammar_rules rules;
|
|
||||||
|
|
||||||
llama_grammar_stack c_rules() const;
|
|
||||||
|
|
||||||
uint32_t get_symbol_id(const char * src, size_t len);
|
|
||||||
uint32_t generate_symbol_id(const std::string & base_name);
|
|
||||||
|
|
||||||
void add_rule(uint32_t rule_id, const llama_grammar_rule & rule);
|
|
||||||
|
|
||||||
const char * parse_alternates(
|
|
||||||
const char * src,
|
|
||||||
const std::string & rule_name,
|
|
||||||
uint32_t rule_id,
|
|
||||||
bool is_nested);
|
|
||||||
|
|
||||||
const char * parse_sequence(
|
|
||||||
const char * src,
|
|
||||||
const std::string & rule_name,
|
|
||||||
llama_grammar_rule & rule,
|
|
||||||
bool is_nested);
|
|
||||||
|
|
||||||
const char * parse_rule(const char * src);
|
|
||||||
|
|
||||||
bool parse(const char * src);
|
|
||||||
void print(FILE * file);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_grammar {
|
|
||||||
// note: allow null vocab for testing (not great)
|
|
||||||
const llama_vocab * vocab;
|
|
||||||
|
|
||||||
const llama_grammar_rules rules; // TODO: shared ptr
|
|
||||||
llama_grammar_stacks stacks;
|
|
||||||
|
|
||||||
// buffer for partially generated UTF-8 sequence from accepted tokens
|
|
||||||
llama_partial_utf8 partial_utf8;
|
|
||||||
};
|
|
||||||
|
|
||||||
//
|
|
||||||
// internal API
|
|
||||||
//
|
|
||||||
|
|
||||||
// note: needed for tests (not great)
|
|
||||||
struct llama_grammar * llama_grammar_init_impl(
|
|
||||||
const struct llama_vocab * vocab,
|
|
||||||
const llama_grammar_element ** rules,
|
|
||||||
size_t n_rules,
|
|
||||||
size_t start_rule_index);
|
|
||||||
|
|
||||||
struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root);
|
|
||||||
|
|
||||||
void llama_grammar_free_impl(struct llama_grammar * grammar);
|
|
||||||
|
|
||||||
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar);
|
|
||||||
|
|
||||||
// TODO: move the API below as member functions of llama_grammar
|
|
||||||
void llama_grammar_apply_impl(
|
|
||||||
const struct llama_grammar & grammar,
|
|
||||||
llama_token_data_array * cur_p);
|
|
||||||
|
|
||||||
void llama_grammar_accept_impl(
|
|
||||||
struct llama_grammar & grammar,
|
|
||||||
llama_token token);
|
|
@ -1,181 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "llama.h"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
#ifdef __MINGW32__
|
|
||||||
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
||||||
#else
|
|
||||||
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define LLAMA_ATTRIBUTE_FORMAT(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//
|
|
||||||
// logging
|
|
||||||
//
|
|
||||||
|
|
||||||
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
|
||||||
void llama_log_internal (ggml_log_level level, const char * format, ...);
|
|
||||||
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
|
||||||
|
|
||||||
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
|
||||||
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
|
||||||
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
|
||||||
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
|
||||||
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
|
||||||
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
|
|
||||||
|
|
||||||
//
|
|
||||||
// helpers
|
|
||||||
//
|
|
||||||
|
|
||||||
struct time_meas {
|
|
||||||
time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
|
|
||||||
|
|
||||||
~time_meas() {
|
|
||||||
if (t_start_us >= 0) {
|
|
||||||
t_acc += ggml_time_us() - t_start_us;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int64_t t_start_us;
|
|
||||||
|
|
||||||
int64_t & t_acc;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
|
||||||
if (search.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::string builder;
|
|
||||||
builder.reserve(s.length());
|
|
||||||
size_t pos = 0;
|
|
||||||
size_t last_pos = 0;
|
|
||||||
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
|
||||||
builder.append(s, last_pos, pos - last_pos);
|
|
||||||
builder.append(replace);
|
|
||||||
last_pos = pos + search.length();
|
|
||||||
}
|
|
||||||
builder.append(s, last_pos, std::string::npos);
|
|
||||||
s = std::move(builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
|
|
||||||
struct llama_context * ctx
|
|
||||||
);
|
|
||||||
|
|
||||||
// the ring buffer works similarly to std::deque, but with a fixed capacity
|
|
||||||
template<typename T>
|
|
||||||
struct ring_buffer {
|
|
||||||
ring_buffer(size_t cap) : capacity(cap), data(cap) {}
|
|
||||||
|
|
||||||
T & front() {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[first];
|
|
||||||
}
|
|
||||||
|
|
||||||
const T & front() const {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[first];
|
|
||||||
}
|
|
||||||
|
|
||||||
T & back() {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
const T & back() const {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
void push_back(const T & value) {
|
|
||||||
if (capacity == 0) {
|
|
||||||
throw std::runtime_error("ring buffer: capacity is zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sz == capacity) {
|
|
||||||
// advance the start when buffer is full
|
|
||||||
first = (first + 1) % capacity;
|
|
||||||
} else {
|
|
||||||
sz++;
|
|
||||||
}
|
|
||||||
data[pos] = value;
|
|
||||||
pos = (pos + 1) % capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
T pop_front() {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
T value = data[first];
|
|
||||||
first = (first + 1) % capacity;
|
|
||||||
sz--;
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
//T & operator[](size_t i) {
|
|
||||||
// if (i >= sz) {
|
|
||||||
// throw std::runtime_error("ring buffer: index out of bounds");
|
|
||||||
// }
|
|
||||||
// return data[(first + i) % capacity];
|
|
||||||
//}
|
|
||||||
|
|
||||||
//const T & at(size_t i) const {
|
|
||||||
// if (i >= sz) {
|
|
||||||
// throw std::runtime_error("ring buffer: index out of bounds");
|
|
||||||
// }
|
|
||||||
// return data[(first + i) % capacity];
|
|
||||||
//}
|
|
||||||
|
|
||||||
const T & rat(size_t i) const {
|
|
||||||
if (i >= sz) {
|
|
||||||
throw std::runtime_error("ring buffer: index out of bounds");
|
|
||||||
}
|
|
||||||
return data[(first + sz - i - 1) % capacity];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<T> to_vector() const {
|
|
||||||
std::vector<T> result;
|
|
||||||
result.reserve(sz);
|
|
||||||
for (size_t i = 0; i < sz; i++) {
|
|
||||||
result.push_back(data[(first + i) % capacity]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear() {
|
|
||||||
// here only reset the status of the buffer
|
|
||||||
sz = 0;
|
|
||||||
first = 0;
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool empty() const {
|
|
||||||
return sz == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const {
|
|
||||||
return sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t capacity = 0;
|
|
||||||
size_t sz = 0;
|
|
||||||
size_t first = 0;
|
|
||||||
size_t pos = 0;
|
|
||||||
std::vector<T> data;
|
|
||||||
};
|
|
File diff suppressed because it is too large
Load Diff
@ -1,48 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
// TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ?
|
|
||||||
|
|
||||||
#include "llama-grammar.h"
|
|
||||||
|
|
||||||
struct llama_vocab;
|
|
||||||
struct llama_grammar;
|
|
||||||
|
|
||||||
// sampler chain
|
|
||||||
|
|
||||||
struct llama_sampler_chain {
|
|
||||||
llama_sampler_chain_params params;
|
|
||||||
|
|
||||||
std::vector<struct llama_sampler *> samplers;
|
|
||||||
|
|
||||||
// timing
|
|
||||||
|
|
||||||
mutable int64_t t_sample_us;
|
|
||||||
|
|
||||||
mutable int32_t n_sample;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_sampler * llama_sampler_init_grammar_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
const char * grammar_str,
|
|
||||||
const char * grammar_root);
|
|
||||||
|
|
||||||
struct llama_sampler * llama_sampler_init_infill_impl(
|
|
||||||
const struct llama_vocab & vocab);
|
|
||||||
|
|
||||||
struct llama_sampler * llama_sampler_init_dry_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
int32_t context_size,
|
|
||||||
float dry_multiplier,
|
|
||||||
float dry_base,
|
|
||||||
int32_t dry_allowed_length,
|
|
||||||
int32_t dry_penalty_last_n,
|
|
||||||
const char ** seq_breakers,
|
|
||||||
size_t num_breakers);
|
|
||||||
|
|
||||||
struct llama_sampler * llama_sampler_init_dry_testing(
|
|
||||||
int32_t context_size,
|
|
||||||
float dry_multiplier,
|
|
||||||
float dry_base,
|
|
||||||
int32_t dry_allowed_length,
|
|
||||||
int32_t dry_penalty_last_n,
|
|
||||||
const std::vector<std::vector<llama_token>>& seq_breakers);
|
|
File diff suppressed because it is too large
Load Diff
@ -1,170 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "llama-impl.h"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <map>
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
struct llm_tokenizer;
|
|
||||||
|
|
||||||
struct llama_vocab {
|
|
||||||
using id = llama_token;
|
|
||||||
using token = std::string;
|
|
||||||
using tattr = llama_token_attr;
|
|
||||||
|
|
||||||
struct token_data {
|
|
||||||
token text;
|
|
||||||
float score;
|
|
||||||
tattr attr;
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32_t n_vocab = 0; // TODO: not great because has to keep in sync with hparams.n_vocab
|
|
||||||
|
|
||||||
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
|
|
||||||
enum llama_vocab_pre_type type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
|
||||||
|
|
||||||
int max_token_len = 0; // used for optimizing longest token search
|
|
||||||
|
|
||||||
std::unordered_map<token, id> token_to_id;
|
|
||||||
std::vector<token_data> id_to_token;
|
|
||||||
|
|
||||||
std::vector<id> cache_special_tokens;
|
|
||||||
std::vector<token> cache_token_to_piece; // llama_token_to_piece(special = true);
|
|
||||||
|
|
||||||
std::map<std::pair<std::string, std::string>, int> bpe_ranks;
|
|
||||||
|
|
||||||
// default LLaMA special tokens
|
|
||||||
// TODO: should we set all of these to LLAMA_TOKEN_NULL?
|
|
||||||
id special_bos_id = 1;
|
|
||||||
id special_eos_id = 2;
|
|
||||||
id special_eot_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_eom_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_unk_id = 0;
|
|
||||||
id special_sep_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_pad_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_cls_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_mask_id = LLAMA_TOKEN_NULL;
|
|
||||||
|
|
||||||
id linefeed_id = 13;
|
|
||||||
|
|
||||||
// fim tokens
|
|
||||||
id special_fim_pre_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_fim_suf_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_fim_mid_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_fim_pad_id = LLAMA_TOKEN_NULL;
|
|
||||||
id special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
|
|
||||||
id special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
|
|
||||||
|
|
||||||
// set of all tokens that cause "end of generation"
|
|
||||||
std::set<id> special_eog_ids;
|
|
||||||
|
|
||||||
// tokenizer flags
|
|
||||||
bool tokenizer_add_space_prefix = false;
|
|
||||||
bool tokenizer_add_bos = false;
|
|
||||||
bool tokenizer_add_eos = false;
|
|
||||||
bool tokenizer_ignore_merges = false;
|
|
||||||
bool tokenizer_clean_spaces = false; // clean_up_tokenization_spaces
|
|
||||||
bool tokenizer_remove_extra_whitespaces = false;
|
|
||||||
bool tokenizer_escape_whitespaces = true;
|
|
||||||
bool tokenizer_treat_whitespace_as_suffix = false;
|
|
||||||
|
|
||||||
std::vector<char> precompiled_charsmap;
|
|
||||||
|
|
||||||
llm_tokenizer * tokenizer = nullptr;
|
|
||||||
|
|
||||||
llama_vocab() = default;
|
|
||||||
~llama_vocab();
|
|
||||||
|
|
||||||
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
|
|
||||||
|
|
||||||
void init_tokenizer();
|
|
||||||
};
|
|
||||||
|
|
||||||
//
|
|
||||||
// internal API
|
|
||||||
//
|
|
||||||
|
|
||||||
// TODO: rename to llama_tokenize_impl
|
|
||||||
// TODO: This should probably be in llama.h
|
|
||||||
std::vector<llama_vocab::id> llama_tokenize_internal(
|
|
||||||
const llama_vocab & vocab,
|
|
||||||
std::string raw_text,
|
|
||||||
bool add_special,
|
|
||||||
bool parse_special = false);
|
|
||||||
|
|
||||||
// TODO: move the API below as member functions of llama_vocab
|
|
||||||
llama_token llama_byte_to_token_impl(const llama_vocab & vocab, uint8_t ch);
|
|
||||||
|
|
||||||
const char * llama_token_get_text_impl(const struct llama_vocab & vocab, llama_token token);
|
|
||||||
|
|
||||||
float llama_token_get_score_impl(const struct llama_vocab & vocab, llama_token token);
|
|
||||||
|
|
||||||
llama_token_attr llama_token_get_attr_impl(const struct llama_vocab & vocab, llama_token token);
|
|
||||||
|
|
||||||
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token);
|
|
||||||
|
|
||||||
bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token token);
|
|
||||||
|
|
||||||
llama_token llama_token_bos_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_eos_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_eot_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_eom_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_cls_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
|
|
||||||
|
|
||||||
llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
|
|
||||||
|
|
||||||
llama_token llama_token_fim_pre_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_fim_suf_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_fim_mid_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_fim_pad_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_fim_rep_impl(const struct llama_vocab & vocab);
|
|
||||||
llama_token llama_token_fim_sep_impl(const struct llama_vocab & vocab);
|
|
||||||
|
|
||||||
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
|
|
||||||
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
|
|
||||||
|
|
||||||
int32_t llama_tokenize_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
const char * text,
|
|
||||||
int32_t text_len,
|
|
||||||
llama_token * tokens,
|
|
||||||
int32_t n_tokens_max,
|
|
||||||
bool add_special,
|
|
||||||
bool parse_special);
|
|
||||||
|
|
||||||
// does not write null-terminator to buf
|
|
||||||
int32_t llama_token_to_piece_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
llama_token token,
|
|
||||||
char * buf,
|
|
||||||
int32_t length,
|
|
||||||
int32_t lstrip,
|
|
||||||
bool special);
|
|
||||||
|
|
||||||
// check if token0 is contained as a prefix in token1
|
|
||||||
bool llama_token_is_prefix_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
llama_token token0,
|
|
||||||
llama_token token1);
|
|
||||||
|
|
||||||
int32_t llama_detokenize_impl(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
const llama_token * tokens,
|
|
||||||
int32_t n_tokens,
|
|
||||||
char * text,
|
|
||||||
int32_t text_len_max,
|
|
||||||
bool remove_special,
|
|
||||||
bool unparse_special);
|
|
||||||
|
|
||||||
std::string llama_detokenize(
|
|
||||||
const struct llama_vocab & vocab,
|
|
||||||
const std::vector<llama_token> & tokens,
|
|
||||||
bool special);
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -35,10 +35,10 @@ static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const
|
|||||||
|
|
||||||
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
||||||
std::vector<char> result(8, 0);
|
std::vector<char> result(8, 0);
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), 0, false);
|
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
result.resize(-n_tokens);
|
result.resize(-n_tokens);
|
||||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), 0, false);
|
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
||||||
GGML_ASSERT(check == -n_tokens);
|
GGML_ASSERT(check == -n_tokens);
|
||||||
} else {
|
} else {
|
||||||
result.resize(n_tokens);
|
result.resize(n_tokens);
|
||||||
@ -314,6 +314,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// tune these to your liking
|
// tune these to your liking
|
||||||
lcparams.n_ctx = 2048;
|
lcparams.n_ctx = 2048;
|
||||||
|
lcparams.seed = 1;
|
||||||
lcparams.n_threads = params.n_threads;
|
lcparams.n_threads = params.n_threads;
|
||||||
lcparams.flash_attn = params.flash_attn;
|
lcparams.flash_attn = params.flash_attn;
|
||||||
|
|
||||||
@ -401,26 +402,6 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
llama_batch batch = llama_batch_init(llama_n_ctx(ctx_llama), 0, 1);
|
llama_batch batch = llama_batch_init(llama_n_ctx(ctx_llama), 0, 1);
|
||||||
|
|
||||||
// init sampler
|
|
||||||
const float top_k = 5;
|
|
||||||
const float top_p = 0.80f;
|
|
||||||
const float temp = 0.30f;
|
|
||||||
|
|
||||||
const int seed = 0;
|
|
||||||
|
|
||||||
auto sparams = llama_sampler_chain_default_params();
|
|
||||||
|
|
||||||
llama_sampler * smpl = llama_sampler_chain_init(sparams);
|
|
||||||
|
|
||||||
if (temp > 0.0f) {
|
|
||||||
llama_sampler_chain_add(smpl, llama_sampler_init_top_k(top_k));
|
|
||||||
llama_sampler_chain_add(smpl, llama_sampler_init_top_p(top_p, 1));
|
|
||||||
llama_sampler_chain_add(smpl, llama_sampler_init_temp (temp));
|
|
||||||
llama_sampler_chain_add(smpl, llama_sampler_init_dist (seed));
|
|
||||||
} else {
|
|
||||||
llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
|
|
||||||
}
|
|
||||||
|
|
||||||
// init session
|
// init session
|
||||||
std::string path_session = params.path_session;
|
std::string path_session = params.path_session;
|
||||||
std::vector<llama_token> session_tokens;
|
std::vector<llama_token> session_tokens;
|
||||||
@ -436,7 +417,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
session_tokens.resize(llama_n_ctx(ctx_llama));
|
session_tokens.resize(llama_n_ctx(ctx_llama));
|
||||||
size_t n_token_count_out = 0;
|
size_t n_token_count_out = 0;
|
||||||
if (!llama_state_load_file(ctx_llama, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
|
if (!llama_load_session_file(ctx_llama, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
|
||||||
fprintf(stderr, "%s: error: failed to load session file '%s'\n", __func__, path_session.c_str());
|
fprintf(stderr, "%s: error: failed to load session file '%s'\n", __func__, path_session.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -719,13 +700,54 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
{
|
{
|
||||||
// out of user input, sample next token
|
// out of user input, sample next token
|
||||||
|
const float top_k = 5;
|
||||||
|
const float top_p = 0.80f;
|
||||||
|
const float temp = 0.30f;
|
||||||
|
const float repeat_penalty = 1.1764f;
|
||||||
|
|
||||||
|
const int repeat_last_n = 256;
|
||||||
|
|
||||||
if (!path_session.empty() && need_to_save_session) {
|
if (!path_session.empty() && need_to_save_session) {
|
||||||
need_to_save_session = false;
|
need_to_save_session = false;
|
||||||
llama_state_save_file(ctx_llama, path_session.c_str(), session_tokens.data(), session_tokens.size());
|
llama_save_session_file(ctx_llama, path_session.c_str(), session_tokens.data(), session_tokens.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
const llama_token id = llama_sampler_sample(smpl, ctx_llama, -1);
|
llama_token id = 0;
|
||||||
|
|
||||||
|
{
|
||||||
|
auto logits = llama_get_logits(ctx_llama);
|
||||||
|
auto n_vocab = llama_n_vocab(model_llama);
|
||||||
|
|
||||||
|
logits[llama_token_eos(model_llama)] = 0;
|
||||||
|
|
||||||
|
std::vector<llama_token_data> candidates;
|
||||||
|
candidates.reserve(n_vocab);
|
||||||
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||||
|
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||||
|
|
||||||
|
// apply repeat penalty
|
||||||
|
const float nl_logit = logits[llama_token_nl(model_llama)];
|
||||||
|
|
||||||
|
llama_sample_repetition_penalties(ctx_llama, &candidates_p,
|
||||||
|
embd_inp.data() + std::max(0, n_past - repeat_last_n),
|
||||||
|
repeat_last_n, repeat_penalty, 0.0, 0.0f);
|
||||||
|
|
||||||
|
logits[llama_token_nl(model_llama)] = nl_logit;
|
||||||
|
|
||||||
|
if (temp <= 0) {
|
||||||
|
// Greedy sampling
|
||||||
|
id = llama_sample_token_greedy(ctx_llama, &candidates_p);
|
||||||
|
} else {
|
||||||
|
// Temperature sampling
|
||||||
|
llama_sample_top_k(ctx_llama, &candidates_p, top_k, 1);
|
||||||
|
llama_sample_top_p(ctx_llama, &candidates_p, top_p, 1);
|
||||||
|
llama_sample_temp (ctx_llama, &candidates_p, temp);
|
||||||
|
id = llama_sample_token(ctx_llama, &candidates_p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (id != llama_token_eos(model_llama)) {
|
if (id != llama_token_eos(model_llama)) {
|
||||||
// add it to the context
|
// add it to the context
|
||||||
@ -775,14 +797,8 @@ int main(int argc, char ** argv) {
|
|||||||
whisper_print_timings(ctx_wsp);
|
whisper_print_timings(ctx_wsp);
|
||||||
whisper_free(ctx_wsp);
|
whisper_free(ctx_wsp);
|
||||||
|
|
||||||
llama_perf_sampler_print(smpl);
|
llama_print_timings(ctx_llama);
|
||||||
llama_perf_context_print(ctx_llama);
|
|
||||||
|
|
||||||
llama_sampler_free(smpl);
|
|
||||||
llama_batch_free(batch);
|
|
||||||
llama_free(ctx_llama);
|
llama_free(ctx_llama);
|
||||||
|
|
||||||
llama_backend_free();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
|
const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
|
||||||
{0x000000, 0x0080},
|
{0x000000, 0x0080},
|
||||||
{0x000020, 0x0008},
|
{0x000020, 0x0008},
|
||||||
{0x000021, 0x0020},
|
{0x000021, 0x0020},
|
||||||
@ -2311,8 +2311,7 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
|
|||||||
0x003000,
|
0x003000,
|
||||||
};
|
};
|
||||||
|
|
||||||
// list is always in ascending order, to enable binary search
|
const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
|
||||||
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
|
|
||||||
{0x000041, 0x000061},
|
{0x000041, 0x000061},
|
||||||
{0x000042, 0x000062},
|
{0x000042, 0x000062},
|
||||||
{0x000043, 0x000063},
|
{0x000043, 0x000063},
|
||||||
@ -3748,8 +3747,7 @@ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase
|
|||||||
{0x01E921, 0x01E943},
|
{0x01E921, 0x01E943},
|
||||||
};
|
};
|
||||||
|
|
||||||
// list is always in ascending order, to enable binary search
|
const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
|
||||||
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
|
|
||||||
{0x000061, 0x000041},
|
{0x000061, 0x000041},
|
||||||
{0x000062, 0x000042},
|
{0x000062, 0x000042},
|
||||||
{0x000063, 0x000043},
|
{0x000063, 0x000043},
|
||||||
@ -5202,7 +5200,7 @@ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase
|
|||||||
{0x01E943, 0x01E921},
|
{0x01E943, 0x01E921},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, nfd
|
const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
|
||||||
{0x000000, 0x000000, 0x000000},
|
{0x000000, 0x000000, 0x000000},
|
||||||
{0x0000C0, 0x0000C5, 0x000041},
|
{0x0000C0, 0x0000C5, 0x000041},
|
||||||
{0x0000C7, 0x0000C7, 0x000043},
|
{0x0000C7, 0x0000C7, 0x000043},
|
||||||
@ -7032,3 +7030,4 @@ const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, n
|
|||||||
{0x02FA1C, 0x02FA1C, 0x009F3B},
|
{0x02FA1C, 0x02FA1C, 0x009F3B},
|
||||||
{0x02FA1D, 0x02FA1D, 0x02A600},
|
{0x02FA1D, 0x02FA1D, 0x02A600},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -13,8 +13,8 @@ struct range_nfd {
|
|||||||
|
|
||||||
static const uint32_t MAX_CODEPOINTS = 0x110000;
|
static const uint32_t MAX_CODEPOINTS = 0x110000;
|
||||||
|
|
||||||
extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
|
extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
|
||||||
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
|
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
|
||||||
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
|
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
|
||||||
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
|
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
|
||||||
extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
|
extern const std::vector<range_nfd> unicode_ranges_nfd;
|
||||||
|
@ -1,11 +1,6 @@
|
|||||||
#if defined(_MSC_VER)
|
|
||||||
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "unicode.h"
|
#include "unicode.h"
|
||||||
#include "unicode-data.h"
|
#include "unicode-data.h"
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
@ -20,12 +15,6 @@
|
|||||||
#include <locale>
|
#include <locale>
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
|
|
||||||
size_t unicode_len_utf8(char src) {
|
|
||||||
const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
|
||||||
uint8_t highbits = static_cast<uint8_t>(src) >> 4;
|
|
||||||
return lookup[highbits];
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
||||||
std::string result;
|
std::string result;
|
||||||
for (size_t i = 0; i < cps.size(); ++i) {
|
for (size_t i = 0; i < cps.size(); ++i) {
|
||||||
@ -34,7 +23,7 @@ static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
static uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
||||||
assert(offset < utf8.size());
|
assert(offset < utf8.size());
|
||||||
if (!(utf8[offset + 0] & 0x80)) {
|
if (!(utf8[offset + 0] & 0x80)) {
|
||||||
auto result = utf8[offset + 0];
|
auto result = utf8[offset + 0];
|
||||||
@ -123,11 +112,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
|||||||
static std::vector<codepoint_flags> unicode_cpt_flags_array() {
|
static std::vector<codepoint_flags> unicode_cpt_flags_array() {
|
||||||
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
|
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
|
||||||
|
|
||||||
assert (unicode_ranges_flags.begin()[0].first == 0);
|
assert (unicode_ranges_flags.front().first == 0);
|
||||||
assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
|
assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS);
|
||||||
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
|
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
|
||||||
const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
|
const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
|
||||||
const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
|
const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
|
||||||
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
|
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
|
||||||
cpt_flags[cpt] = range_ini.second;
|
cpt_flags[cpt] = range_ini.second;
|
||||||
}
|
}
|
||||||
@ -243,7 +232,8 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
||||||
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
static const codepoint_flags undef(codepoint_flags::UNDEFINED);
|
||||||
|
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t _prev_end = offset_ini;
|
size_t _prev_end = offset_ini;
|
||||||
@ -305,9 +295,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// regex: <space>?[^\s\p{L}\p{N}]+
|
// regex: <space>?[^\s\p{L}\p{N}]+
|
||||||
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
||||||
pos += (cpt == ' ');
|
pos += (cpt == ' ');
|
||||||
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
||||||
flags2 = _get_flags(++pos);
|
flags2 = _get_flags(++pos);
|
||||||
}
|
}
|
||||||
_add_token(pos);
|
_add_token(pos);
|
||||||
@ -361,7 +351,8 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
||||||
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
static const codepoint_flags undef(codepoint_flags::UNDEFINED);
|
||||||
|
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t _prev_end = offset_ini;
|
size_t _prev_end = offset_ini;
|
||||||
@ -403,8 +394,8 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// regex: [^\r\n\p{L}\p{N}]?\p{L}+
|
// regex: [^\r\n\p{L}\p{N}]?\p{L}+ //####FIXME: the first \p{L} is correct?
|
||||||
if (!(cpt == '\r' || cpt == '\n' || flags.is_number)) {
|
if (!(cpt == '\r' || cpt == '\n' || /*flags.is_letter |*/ flags.is_number)) {
|
||||||
if (flags.is_letter || _get_flags(pos+1).is_letter) { // one or more letters
|
if (flags.is_letter || _get_flags(pos+1).is_letter) { // one or more letters
|
||||||
pos++;
|
pos++;
|
||||||
while (_get_flags(pos).is_letter) {
|
while (_get_flags(pos).is_letter) {
|
||||||
@ -430,9 +421,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|||||||
|
|
||||||
// regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
|
// regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
|
||||||
auto flags2 = (cpt == ' ' ? _get_flags(pos+1) : flags);
|
auto flags2 = (cpt == ' ' ? _get_flags(pos+1) : flags);
|
||||||
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags.as_uint()) {
|
if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
||||||
pos += (cpt == ' ');
|
pos += (cpt == ' ');
|
||||||
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
||||||
flags2 = _get_flags(++pos);
|
flags2 = _get_flags(++pos);
|
||||||
}
|
}
|
||||||
uint32_t cpt2 = _get_cpt(pos);
|
uint32_t cpt2 = _get_cpt(pos);
|
||||||
@ -597,7 +588,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|||||||
std::vector<uint32_t> result(cpts.size());
|
std::vector<uint32_t> result(cpts.size());
|
||||||
for (size_t i = 0; i < cpts.size(); ++i) {
|
for (size_t i = 0; i < cpts.size(); ++i) {
|
||||||
const uint32_t cpt = cpts[i];
|
const uint32_t cpt = cpts[i];
|
||||||
auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
|
auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
|
||||||
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
|
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -639,15 +630,8 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t unicode_tolower(uint32_t cp) {
|
uint32_t unicode_tolower(uint32_t cp) {
|
||||||
// binary search
|
auto it = unicode_map_lowercase.find(cp);
|
||||||
auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
|
return it == unicode_map_lowercase.end() ? cp : it->second;
|
||||||
[](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
|
|
||||||
return pair.first < value;
|
|
||||||
});
|
|
||||||
if (it != unicode_map_lowercase.end() && it->first == cp) {
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
return cp; // Return the original code point if no lowercase mapping is found
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// TODO: prefix all symbols with "llama_"
|
|
||||||
|
|
||||||
struct codepoint_flags {
|
struct codepoint_flags {
|
||||||
enum {
|
enum {
|
||||||
UNDEFINED = 0x0001,
|
UNDEFINED = 0x0001,
|
||||||
@ -48,10 +46,8 @@ struct codepoint_flags {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t unicode_len_utf8(char src);
|
|
||||||
|
|
||||||
std::string unicode_cpt_to_utf8(uint32_t cp);
|
std::string unicode_cpt_to_utf8(uint32_t cp);
|
||||||
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
|
|
||||||
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
||||||
|
|
||||||
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
||||||
|
@ -21,7 +21,7 @@ help()
|
|||||||
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
||||||
echo "options:"
|
echo "options:"
|
||||||
echo "-s Step in seconds (default is $step)."
|
echo "-s Step in seconds (default is $step)."
|
||||||
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' 'large-v3-turbo' (default is '$model')."
|
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
|
||||||
echo "-t Number of threads to use."
|
echo "-t Number of threads to use."
|
||||||
echo "-h Print this help page."
|
echo "-h Print this help page."
|
||||||
echo
|
echo
|
||||||
|
@ -7,9 +7,8 @@ set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)
|
|||||||
|
|
||||||
set(SOURCE_FILES
|
set(SOURCE_FILES
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
|
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
||||||
${WHISPER_LIB_DIR}/src/whisper.cpp
|
${WHISPER_LIB_DIR}/src/whisper.cpp
|
||||||
${CMAKE_SOURCE_DIR}/jni.c
|
${CMAKE_SOURCE_DIR}/jni.c
|
||||||
|
@ -19,16 +19,9 @@ if (NOT GGML_HOME)
|
|||||||
SOURCE_FILES
|
SOURCE_FILES
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
|
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.c
|
|
||||||
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
|
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
18133C802C64E342005CEAAC /* ggml-aarch64.c in Sources */ = {isa = PBXBuildFile; fileRef = 18133C7F2C64E342005CEAAC /* ggml-aarch64.c */; };
|
|
||||||
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
|
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
|
||||||
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
||||||
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
|
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
|
||||||
@ -22,14 +21,8 @@
|
|||||||
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
||||||
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
||||||
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
||||||
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
|
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
||||||
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
||||||
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
|
|
||||||
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
|
|
||||||
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
|
|
||||||
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */; };
|
|
||||||
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
|
|
||||||
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
|
|
||||||
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
||||||
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
|
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
|
||||||
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
|
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
|
||||||
@ -51,12 +44,10 @@
|
|||||||
/* End PBXCopyFilesBuildPhase section */
|
/* End PBXCopyFilesBuildPhase section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
18133C7E2C64E342005CEAAC /* ggml-aarch64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-aarch64.h"; path = "../../../ggml/src/ggml-aarch64.h"; sourceTree = "<group>"; };
|
|
||||||
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
|
|
||||||
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
|
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
|
||||||
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
|
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
|
||||||
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
|
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal.m"; sourceTree = "<group>"; };
|
||||||
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
|
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal.metal"; sourceTree = "<group>"; };
|
||||||
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||||
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
||||||
@ -79,20 +70,9 @@
|
|||||||
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
|
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
|
||||||
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
|
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
|
||||||
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml/src/ggml-backend-impl.h"; sourceTree = "<group>"; };
|
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml/src/ggml-backend-impl.h"; sourceTree = "<group>"; };
|
||||||
18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
|
18ABE1572AF556340044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../../ggml/src/ggml-backend.c"; sourceTree = "<group>"; };
|
||||||
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
|
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
|
||||||
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
|
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
|
||||||
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
|
|
||||||
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
|
|
||||||
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
|
|
||||||
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
|
|
||||||
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
|
|
||||||
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
|
|
||||||
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.c"; sourceTree = "<group>"; };
|
|
||||||
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
|
|
||||||
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
|
|
||||||
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
|
|
||||||
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
|
|
||||||
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
||||||
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
||||||
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
||||||
@ -132,23 +112,10 @@
|
|||||||
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
|
|
||||||
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
|
|
||||||
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */,
|
|
||||||
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
|
|
||||||
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
|
|
||||||
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
|
|
||||||
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
|
|
||||||
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
|
|
||||||
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
|
|
||||||
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
|
|
||||||
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
|
|
||||||
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
|
|
||||||
18133C7E2C64E342005CEAAC /* ggml-aarch64.h */,
|
|
||||||
18A275FF2C2A9563001C8D37 /* ggml-common.h */,
|
18A275FF2C2A9563001C8D37 /* ggml-common.h */,
|
||||||
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
|
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
|
||||||
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
|
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
|
||||||
18ABE1572AF556340044A204 /* ggml-backend.cpp */,
|
18ABE1572AF556340044A204 /* ggml-backend.c */,
|
||||||
18ABE1552AF556340044A204 /* ggml-backend.h */,
|
18ABE1552AF556340044A204 /* ggml-backend.h */,
|
||||||
18ABE1582AF556340044A204 /* ggml-impl.h */,
|
18ABE1582AF556340044A204 /* ggml-impl.h */,
|
||||||
18ABE1592AF556340044A204 /* ggml-quants.c */,
|
18ABE1592AF556340044A204 /* ggml-quants.c */,
|
||||||
@ -269,22 +236,15 @@
|
|||||||
files = (
|
files = (
|
||||||
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
|
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
|
||||||
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */,
|
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */,
|
||||||
18133C802C64E342005CEAAC /* ggml-aarch64.c in Sources */,
|
|
||||||
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */,
|
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */,
|
||||||
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
|
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
|
||||||
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
|
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
|
||||||
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
|
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
|
||||||
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
|
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
|
||||||
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
|
|
||||||
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
|
|
||||||
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
|
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
|
||||||
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */,
|
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */,
|
||||||
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
|
|
||||||
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
|
|
||||||
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
|
|
||||||
18627C8C29052BE000BD2A04 /* main.m in Sources */,
|
18627C8C29052BE000BD2A04 /* main.m in Sources */,
|
||||||
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
|
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
|
||||||
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
|
|
||||||
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
|
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
|
||||||
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
|
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
|
||||||
);
|
);
|
||||||
@ -363,8 +323,6 @@
|
|||||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
|
||||||
HEADER_SEARCH_PATHS = ../../../ggml/src/;
|
|
||||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||||
MTL_FAST_MATH = YES;
|
MTL_FAST_MATH = YES;
|
||||||
@ -418,8 +376,6 @@
|
|||||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
|
||||||
HEADER_SEARCH_PATHS = ../../../ggml/src/;
|
|
||||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||||
MTL_ENABLE_DEBUG_INFO = NO;
|
MTL_ENABLE_DEBUG_INFO = NO;
|
||||||
MTL_FAST_MATH = YES;
|
MTL_FAST_MATH = YES;
|
||||||
@ -432,6 +388,64 @@
|
|||||||
};
|
};
|
||||||
name = Release;
|
name = Release;
|
||||||
};
|
};
|
||||||
|
18627C9029052BE000BD2A04 /* Debug */ = {
|
||||||
|
isa = XCBuildConfiguration;
|
||||||
|
buildSettings = {
|
||||||
|
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||||
|
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||||
|
CODE_SIGN_STYLE = Automatic;
|
||||||
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
|
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||||
|
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
||||||
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
|
INFOPLIST_FILE = whisper.objc/Info.plist;
|
||||||
|
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||||
|
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
||||||
|
INFOPLIST_KEY_UIMainStoryboardFile = Main;
|
||||||
|
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||||
|
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||||
|
LD_RUNPATH_SEARCH_PATHS = (
|
||||||
|
"$(inherited)",
|
||||||
|
"@executable_path/Frameworks",
|
||||||
|
);
|
||||||
|
MARKETING_VERSION = 1.0;
|
||||||
|
MTL_HEADER_SEARCH_PATHS = "";
|
||||||
|
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
||||||
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
|
TARGETED_DEVICE_FAMILY = "1,2";
|
||||||
|
};
|
||||||
|
name = Debug;
|
||||||
|
};
|
||||||
|
18627C9129052BE000BD2A04 /* Release */ = {
|
||||||
|
isa = XCBuildConfiguration;
|
||||||
|
buildSettings = {
|
||||||
|
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||||
|
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||||
|
CODE_SIGN_STYLE = Automatic;
|
||||||
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
|
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||||
|
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
||||||
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
|
INFOPLIST_FILE = whisper.objc/Info.plist;
|
||||||
|
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||||
|
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
||||||
|
INFOPLIST_KEY_UIMainStoryboardFile = Main;
|
||||||
|
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||||
|
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||||
|
LD_RUNPATH_SEARCH_PATHS = (
|
||||||
|
"$(inherited)",
|
||||||
|
"@executable_path/Frameworks",
|
||||||
|
);
|
||||||
|
MARKETING_VERSION = 1.0;
|
||||||
|
MTL_HEADER_SEARCH_PATHS = "";
|
||||||
|
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
||||||
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
|
TARGETED_DEVICE_FAMILY = "1,2";
|
||||||
|
};
|
||||||
|
name = Release;
|
||||||
|
};
|
||||||
/* End XCBuildConfiguration section */
|
/* End XCBuildConfiguration section */
|
||||||
|
|
||||||
/* Begin XCConfigurationList section */
|
/* Begin XCConfigurationList section */
|
||||||
@ -444,6 +458,15 @@
|
|||||||
defaultConfigurationIsVisible = 0;
|
defaultConfigurationIsVisible = 0;
|
||||||
defaultConfigurationName = Release;
|
defaultConfigurationName = Release;
|
||||||
};
|
};
|
||||||
|
18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */ = {
|
||||||
|
isa = XCConfigurationList;
|
||||||
|
buildConfigurations = (
|
||||||
|
18627C9029052BE000BD2A04 /* Debug */,
|
||||||
|
18627C9129052BE000BD2A04 /* Release */,
|
||||||
|
);
|
||||||
|
defaultConfigurationIsVisible = 0;
|
||||||
|
defaultConfigurationName = Release;
|
||||||
|
};
|
||||||
/* End XCConfigurationList section */
|
/* End XCConfigurationList section */
|
||||||
};
|
};
|
||||||
rootObject = 18627C6E29052BDF00BD2A04 /* Project object */;
|
rootObject = 18627C6E29052BDF00BD2A04 /* Project object */;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
import UIKit
|
|
||||||
import whisper
|
import whisper
|
||||||
|
|
||||||
enum WhisperError: Error {
|
enum WhisperError: Error {
|
||||||
@ -56,91 +55,11 @@ actor WhisperContext {
|
|||||||
return transcription
|
return transcription
|
||||||
}
|
}
|
||||||
|
|
||||||
static func benchMemcpy(nThreads: Int32) async -> String {
|
|
||||||
return String.init(cString: whisper_bench_memcpy_str(nThreads))
|
|
||||||
}
|
|
||||||
|
|
||||||
static func benchGgmlMulMat(nThreads: Int32) async -> String {
|
|
||||||
return String.init(cString: whisper_bench_ggml_mul_mat_str(nThreads))
|
|
||||||
}
|
|
||||||
|
|
||||||
private func systemInfo() -> String {
|
|
||||||
var info = ""
|
|
||||||
if (ggml_cpu_has_neon() != 0) { info += "NEON " }
|
|
||||||
return String(info.dropLast())
|
|
||||||
}
|
|
||||||
|
|
||||||
func benchFull(modelName: String, nThreads: Int32) async -> String {
|
|
||||||
let nMels = whisper_model_n_mels(context)
|
|
||||||
if (whisper_set_mel(context, nil, 0, nMels) != 0) {
|
|
||||||
return "error: failed to set mel"
|
|
||||||
}
|
|
||||||
|
|
||||||
// heat encoder
|
|
||||||
if (whisper_encode(context, 0, nThreads) != 0) {
|
|
||||||
return "error: failed to encode"
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokens = [whisper_token](repeating: 0, count: 512)
|
|
||||||
|
|
||||||
// prompt heat
|
|
||||||
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
|
|
||||||
return "error: failed to decode"
|
|
||||||
}
|
|
||||||
|
|
||||||
// text-generation heat
|
|
||||||
if (whisper_decode(context, &tokens, 1, 256, nThreads) != 0) {
|
|
||||||
return "error: failed to decode"
|
|
||||||
}
|
|
||||||
|
|
||||||
whisper_reset_timings(context)
|
|
||||||
|
|
||||||
// actual run
|
|
||||||
if (whisper_encode(context, 0, nThreads) != 0) {
|
|
||||||
return "error: failed to encode"
|
|
||||||
}
|
|
||||||
|
|
||||||
// text-generation
|
|
||||||
for i in 0..<256 {
|
|
||||||
if (whisper_decode(context, &tokens, 1, Int32(i), nThreads) != 0) {
|
|
||||||
return "error: failed to decode"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// batched decoding
|
|
||||||
for _ in 0..<64 {
|
|
||||||
if (whisper_decode(context, &tokens, 5, 0, nThreads) != 0) {
|
|
||||||
return "error: failed to decode"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// prompt processing
|
|
||||||
for _ in 0..<16 {
|
|
||||||
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
|
|
||||||
return "error: failed to decode"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
whisper_print_timings(context)
|
|
||||||
|
|
||||||
let deviceModel = await UIDevice.current.model
|
|
||||||
let systemName = await UIDevice.current.systemName
|
|
||||||
let systemInfo = self.systemInfo()
|
|
||||||
let timings: whisper_timings = whisper_get_timings(context).pointee
|
|
||||||
let encodeMs = String(format: "%.2f", timings.encode_ms)
|
|
||||||
let decodeMs = String(format: "%.2f", timings.decode_ms)
|
|
||||||
let batchdMs = String(format: "%.2f", timings.batchd_ms)
|
|
||||||
let promptMs = String(format: "%.2f", timings.prompt_ms)
|
|
||||||
return "| \(deviceModel) | \(systemName) | \(systemInfo) | \(modelName) | \(nThreads) | 1 | \(encodeMs) | \(decodeMs) | \(batchdMs) | \(promptMs) | <todo> |"
|
|
||||||
}
|
|
||||||
|
|
||||||
static func createContext(path: String) throws -> WhisperContext {
|
static func createContext(path: String) throws -> WhisperContext {
|
||||||
var params = whisper_context_default_params()
|
var params = whisper_context_default_params()
|
||||||
#if targetEnvironment(simulator)
|
#if targetEnvironment(simulator)
|
||||||
params.use_gpu = false
|
params.use_gpu = false
|
||||||
print("Running on the simulator, using CPU")
|
print("Running on the simulator, using CPU")
|
||||||
#else
|
|
||||||
params.flash_attn = true // Enabled by default for Metal
|
|
||||||
#endif
|
#endif
|
||||||
let context = whisper_init_from_file_with_params(path, params)
|
let context = whisper_init_from_file_with_params(path, params)
|
||||||
if let context {
|
if let context {
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
import Foundation
|
|
||||||
|
|
||||||
struct Model: Identifiable {
|
|
||||||
var id = UUID()
|
|
||||||
var name: String
|
|
||||||
var info: String
|
|
||||||
var url: String
|
|
||||||
|
|
||||||
var filename: String
|
|
||||||
var fileURL: URL {
|
|
||||||
FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(filename)
|
|
||||||
}
|
|
||||||
|
|
||||||
func fileExists() -> Bool {
|
|
||||||
FileManager.default.fileExists(atPath: fileURL.path)
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,7 +14,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
|||||||
private var recordedFile: URL? = nil
|
private var recordedFile: URL? = nil
|
||||||
private var audioPlayer: AVAudioPlayer?
|
private var audioPlayer: AVAudioPlayer?
|
||||||
|
|
||||||
private var builtInModelUrl: URL? {
|
private var modelUrl: URL? {
|
||||||
Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "models")
|
Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "models")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,59 +28,23 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
|||||||
|
|
||||||
override init() {
|
override init() {
|
||||||
super.init()
|
super.init()
|
||||||
loadModel()
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadModel(path: URL? = nil, log: Bool = true) {
|
|
||||||
do {
|
do {
|
||||||
whisperContext = nil
|
try loadModel()
|
||||||
if (log) { messageLog += "Loading model...\n" }
|
|
||||||
let modelUrl = path ?? builtInModelUrl
|
|
||||||
if let modelUrl {
|
|
||||||
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
|
||||||
if (log) { messageLog += "Loaded model \(modelUrl.lastPathComponent)\n" }
|
|
||||||
} else {
|
|
||||||
if (log) { messageLog += "Could not locate model\n" }
|
|
||||||
}
|
|
||||||
canTranscribe = true
|
canTranscribe = true
|
||||||
} catch {
|
} catch {
|
||||||
print(error.localizedDescription)
|
print(error.localizedDescription)
|
||||||
if (log) { messageLog += "\(error.localizedDescription)\n" }
|
messageLog += "\(error.localizedDescription)\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func benchCurrentModel() async {
|
private func loadModel() throws {
|
||||||
if whisperContext == nil {
|
messageLog += "Loading model...\n"
|
||||||
messageLog += "Cannot bench without loaded model\n"
|
if let modelUrl {
|
||||||
return
|
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
||||||
|
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||||
|
} else {
|
||||||
|
messageLog += "Could not locate model\n"
|
||||||
}
|
}
|
||||||
messageLog += "Running benchmark for loaded model\n"
|
|
||||||
let result = await whisperContext?.benchFull(modelName: "<current>", nThreads: Int32(min(4, cpuCount())))
|
|
||||||
if (result != nil) { messageLog += result! + "\n" }
|
|
||||||
}
|
|
||||||
|
|
||||||
func bench(models: [Model]) async {
|
|
||||||
let nThreads = Int32(min(4, cpuCount()))
|
|
||||||
|
|
||||||
// messageLog += "Running memcpy benchmark\n"
|
|
||||||
// messageLog += await WhisperContext.benchMemcpy(nThreads: nThreads) + "\n"
|
|
||||||
//
|
|
||||||
// messageLog += "Running ggml_mul_mat benchmark with \(nThreads) threads\n"
|
|
||||||
// messageLog += await WhisperContext.benchGgmlMulMat(nThreads: nThreads) + "\n"
|
|
||||||
|
|
||||||
messageLog += "Running benchmark for all downloaded models\n"
|
|
||||||
messageLog += "| CPU | OS | Config | Model | Th | FA | Enc. | Dec. | Bch5 | PP | Commit |\n"
|
|
||||||
messageLog += "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n"
|
|
||||||
for model in models {
|
|
||||||
loadModel(path: model.fileURL, log: false)
|
|
||||||
if whisperContext == nil {
|
|
||||||
messageLog += "Cannot bench without loaded model\n"
|
|
||||||
break
|
|
||||||
}
|
|
||||||
let result = await whisperContext?.benchFull(modelName: model.name, nThreads: nThreads)
|
|
||||||
if (result != nil) { messageLog += result! + "\n" }
|
|
||||||
}
|
|
||||||
messageLog += "Benchmarking completed\n"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func transcribeSample() async {
|
func transcribeSample() async {
|
||||||
@ -196,8 +160,3 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
|||||||
isRecording = false
|
isRecording = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fileprivate func cpuCount() -> Int {
|
|
||||||
ProcessInfo.processInfo.processorCount
|
|
||||||
}
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import SwiftUI
|
import SwiftUI
|
||||||
import AVFoundation
|
import AVFoundation
|
||||||
import Foundation
|
|
||||||
|
|
||||||
struct ContentView: View {
|
struct ContentView: View {
|
||||||
@StateObject var whisperState = WhisperState()
|
@StateObject var whisperState = WhisperState()
|
||||||
@ -30,125 +29,15 @@ struct ContentView: View {
|
|||||||
Text(verbatim: whisperState.messageLog)
|
Text(verbatim: whisperState.messageLog)
|
||||||
.frame(maxWidth: .infinity, alignment: .leading)
|
.frame(maxWidth: .infinity, alignment: .leading)
|
||||||
}
|
}
|
||||||
.font(.footnote)
|
|
||||||
.padding()
|
|
||||||
.background(Color.gray.opacity(0.1))
|
|
||||||
.cornerRadius(10)
|
|
||||||
|
|
||||||
HStack {
|
|
||||||
Button("Clear Logs", action: {
|
|
||||||
whisperState.messageLog = ""
|
|
||||||
})
|
|
||||||
.font(.footnote)
|
|
||||||
.buttonStyle(.bordered)
|
|
||||||
|
|
||||||
Button("Copy Logs", action: {
|
|
||||||
UIPasteboard.general.string = whisperState.messageLog
|
|
||||||
})
|
|
||||||
.font(.footnote)
|
|
||||||
.buttonStyle(.bordered)
|
|
||||||
|
|
||||||
Button("Bench", action: {
|
|
||||||
Task {
|
|
||||||
await whisperState.benchCurrentModel()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.font(.footnote)
|
|
||||||
.buttonStyle(.bordered)
|
|
||||||
.disabled(!whisperState.canTranscribe)
|
|
||||||
|
|
||||||
Button("Bench All", action: {
|
|
||||||
Task {
|
|
||||||
await whisperState.bench(models: ModelsView.getDownloadedModels())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.font(.footnote)
|
|
||||||
.buttonStyle(.bordered)
|
|
||||||
.disabled(!whisperState.canTranscribe)
|
|
||||||
}
|
|
||||||
|
|
||||||
NavigationLink(destination: ModelsView(whisperState: whisperState)) {
|
|
||||||
Text("View Models")
|
|
||||||
}
|
|
||||||
.font(.footnote)
|
|
||||||
.padding()
|
|
||||||
}
|
}
|
||||||
.navigationTitle("Whisper SwiftUI Demo")
|
.navigationTitle("Whisper SwiftUI Demo")
|
||||||
.padding()
|
.padding()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ModelsView: View {
|
|
||||||
@ObservedObject var whisperState: WhisperState
|
|
||||||
@Environment(\.dismiss) var dismiss
|
|
||||||
|
|
||||||
private static let models: [Model] = [
|
|
||||||
Model(name: "tiny", info: "(F16, 75 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", filename: "tiny.bin"),
|
|
||||||
Model(name: "tiny-q5_1", info: "(31 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin", filename: "tiny-q5_1.bin"),
|
|
||||||
Model(name: "tiny-q8_0", info: "(42 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q8_0.bin", filename: "tiny-q8_0.bin"),
|
|
||||||
Model(name: "tiny.en", info: "(F16, 75 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin", filename: "tiny.en.bin"),
|
|
||||||
Model(name: "tiny.en-q5_1", info: "(31 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin", filename: "tiny.en-q5_1.bin"),
|
|
||||||
Model(name: "tiny.en-q8_0", info: "(42 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q8_0.bin", filename: "tiny.en-q8_0.bin"),
|
|
||||||
Model(name: "base", info: "(F16, 142 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin", filename: "base.bin"),
|
|
||||||
Model(name: "base-q5_1", info: "(57 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin", filename: "base-q5_1.bin"),
|
|
||||||
Model(name: "base-q8_0", info: "(78 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q8_0.bin", filename: "base-q8_0.bin"),
|
|
||||||
Model(name: "base.en", info: "(F16, 142 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin", filename: "base.en.bin"),
|
|
||||||
Model(name: "base.en-q5_1", info: "(57 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin", filename: "base.en-q5_1.bin"),
|
|
||||||
Model(name: "base.en-q8_0", info: "(78 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q8_0.bin", filename: "base.en-q8_0.bin"),
|
|
||||||
Model(name: "small", info: "(F16, 466 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", filename: "small.bin"),
|
|
||||||
Model(name: "small-q5_1", info: "(181 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin", filename: "small-q5_1.bin"),
|
|
||||||
Model(name: "small-q8_0", info: "(252 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q8_0.bin", filename: "small-q8_0.bin"),
|
|
||||||
Model(name: "small.en", info: "(F16, 466 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin", filename: "small.en.bin"),
|
|
||||||
Model(name: "small.en-q5_1", info: "(181 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin", filename: "small.en-q5_1.bin"),
|
|
||||||
Model(name: "small.en-q8_0", info: "(252 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q8_0.bin", filename: "small.en-q8_0.bin"),
|
|
||||||
Model(name: "medium", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin", filename: "medium.bin"),
|
|
||||||
Model(name: "medium-q5_0", info: "(514 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin", filename: "medium-q5_0.bin"),
|
|
||||||
Model(name: "medium-q8_0", info: "(785 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q8_0.bin", filename: "medium-q8_0.bin"),
|
|
||||||
Model(name: "medium.en", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin", filename: "medium.en.bin"),
|
|
||||||
Model(name: "medium.en-q5_0", info: "(514 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin", filename: "medium.en-q5_0.bin"),
|
|
||||||
Model(name: "medium.en-q8_0", info: "(785 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q8_0.bin", filename: "medium.en-q8_0.bin"),
|
|
||||||
Model(name: "large-v1", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large.bin", filename: "large.bin"),
|
|
||||||
Model(name: "large-v2", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2.bin", filename: "large-v2.bin"),
|
|
||||||
Model(name: "large-v2-q5_0", info: "(1.1 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2-q5_0.bin", filename: "large-v2-q5_0.bin"),
|
|
||||||
Model(name: "large-v2-q8_0", info: "(1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2-q8_0.bin", filename: "large-v2-q8_0.bin"),
|
|
||||||
Model(name: "large-v3", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin", filename: "large-v3.bin"),
|
|
||||||
Model(name: "large-v3-q5_0", info: "(1.1 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-q5_0.bin", filename: "large-v3-q5_0.bin"),
|
|
||||||
Model(name: "large-v3-turbo", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin", filename: "large-v3-turbo.bin"),
|
|
||||||
Model(name: "large-v3-turbo-q5_0", info: "(547 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin", filename: "large-v3-turbo-q5_0.bin"),
|
|
||||||
Model(name: "large-v3-turbo-q8_0", info: "(834 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q8_0.bin", filename: "large-v3-turbo-q8_0.bin"),
|
|
||||||
]
|
|
||||||
|
|
||||||
static func getDownloadedModels() -> [Model] {
|
|
||||||
// Filter models that have been downloaded
|
|
||||||
return models.filter {
|
|
||||||
FileManager.default.fileExists(atPath: $0.fileURL.path())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadModel(model: Model) {
|
|
||||||
Task {
|
|
||||||
dismiss()
|
|
||||||
whisperState.loadModel(path: model.fileURL)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var body: some View {
|
|
||||||
List {
|
|
||||||
Section(header: Text("Models")) {
|
|
||||||
ForEach(ModelsView.models) { model in
|
|
||||||
DownloadButton(model: model)
|
|
||||||
.onLoad(perform: loadModel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.listStyle(GroupedListStyle())
|
|
||||||
.navigationBarTitle("Models", displayMode: .inline).toolbar {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//struct ContentView_Previews: PreviewProvider {
|
struct ContentView_Previews: PreviewProvider {
|
||||||
// static var previews: some View {
|
static var previews: some View {
|
||||||
// ContentView()
|
ContentView()
|
||||||
// }
|
}
|
||||||
//}
|
}
|
||||||
|
@ -1,102 +0,0 @@
|
|||||||
import SwiftUI
|
|
||||||
|
|
||||||
struct DownloadButton: View {
|
|
||||||
private var model: Model
|
|
||||||
|
|
||||||
@State private var status: String
|
|
||||||
|
|
||||||
@State private var downloadTask: URLSessionDownloadTask?
|
|
||||||
@State private var progress = 0.0
|
|
||||||
@State private var observation: NSKeyValueObservation?
|
|
||||||
|
|
||||||
private var onLoad: ((_ model: Model) -> Void)?
|
|
||||||
|
|
||||||
init(model: Model) {
|
|
||||||
self.model = model
|
|
||||||
status = model.fileExists() ? "downloaded" : "download"
|
|
||||||
}
|
|
||||||
|
|
||||||
func onLoad(perform action: @escaping (_ model: Model) -> Void) -> DownloadButton {
|
|
||||||
var button = self
|
|
||||||
button.onLoad = action
|
|
||||||
return button
|
|
||||||
}
|
|
||||||
|
|
||||||
private func download() {
|
|
||||||
status = "downloading"
|
|
||||||
print("Downloading model \(model.name) from \(model.url)")
|
|
||||||
guard let url = URL(string: model.url) else { return }
|
|
||||||
|
|
||||||
downloadTask = URLSession.shared.downloadTask(with: url) { temporaryURL, response, error in
|
|
||||||
if let error = error {
|
|
||||||
print("Error: \(error.localizedDescription)")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
guard let response = response as? HTTPURLResponse, (200...299).contains(response.statusCode) else {
|
|
||||||
print("Server error!")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if let temporaryURL = temporaryURL {
|
|
||||||
try FileManager.default.copyItem(at: temporaryURL, to: model.fileURL)
|
|
||||||
print("Writing to \(model.filename) completed")
|
|
||||||
status = "downloaded"
|
|
||||||
}
|
|
||||||
} catch let err {
|
|
||||||
print("Error: \(err.localizedDescription)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
observation = downloadTask?.progress.observe(\.fractionCompleted) { progress, _ in
|
|
||||||
self.progress = progress.fractionCompleted
|
|
||||||
}
|
|
||||||
|
|
||||||
downloadTask?.resume()
|
|
||||||
}
|
|
||||||
|
|
||||||
var body: some View {
|
|
||||||
VStack {
|
|
||||||
Button(action: {
|
|
||||||
if (status == "download") {
|
|
||||||
download()
|
|
||||||
} else if (status == "downloading") {
|
|
||||||
downloadTask?.cancel()
|
|
||||||
status = "download"
|
|
||||||
} else if (status == "downloaded") {
|
|
||||||
if !model.fileExists() {
|
|
||||||
download()
|
|
||||||
}
|
|
||||||
onLoad?(model)
|
|
||||||
}
|
|
||||||
}) {
|
|
||||||
let title = "\(model.name) \(model.info)"
|
|
||||||
if (status == "download") {
|
|
||||||
Text("Download \(title)")
|
|
||||||
} else if (status == "downloading") {
|
|
||||||
Text("\(title) (Downloading \(Int(progress * 100))%)")
|
|
||||||
} else if (status == "downloaded") {
|
|
||||||
Text("Load \(title)")
|
|
||||||
} else {
|
|
||||||
Text("Unknown status")
|
|
||||||
}
|
|
||||||
}.swipeActions {
|
|
||||||
if (status == "downloaded") {
|
|
||||||
Button("Delete") {
|
|
||||||
do {
|
|
||||||
try FileManager.default.removeItem(at: model.fileURL)
|
|
||||||
} catch {
|
|
||||||
print("Error deleting file: \(error)")
|
|
||||||
}
|
|
||||||
status = "download"
|
|
||||||
}
|
|
||||||
.tint(.red)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.onDisappear() {
|
|
||||||
downloadTask?.cancel()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -17,8 +17,6 @@
|
|||||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
||||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
||||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
||||||
7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */; };
|
|
||||||
7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0EF2CE0C6F700ACD7BF /* Model.swift */; };
|
|
||||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
|
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
@ -35,8 +33,6 @@
|
|||||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
||||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
||||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
||||||
7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
|
||||||
7F79E0EF2CE0C6F700ACD7BF /* Model.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
|
|
||||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
|
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
|
||||||
/* End PBXFileReference section */
|
/* End PBXFileReference section */
|
||||||
|
|
||||||
@ -56,7 +52,6 @@
|
|||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */,
|
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */,
|
||||||
7F79E0EF2CE0C6F700ACD7BF /* Model.swift */,
|
|
||||||
);
|
);
|
||||||
path = Models;
|
path = Models;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
@ -124,7 +119,6 @@
|
|||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
0AAC5D9C29539CCF003032C3 /* ContentView.swift */,
|
0AAC5D9C29539CCF003032C3 /* ContentView.swift */,
|
||||||
7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */,
|
|
||||||
);
|
);
|
||||||
path = UI;
|
path = UI;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
@ -226,9 +220,7 @@
|
|||||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
||||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
||||||
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
||||||
7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */,
|
|
||||||
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
||||||
7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */,
|
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
@ -378,9 +370,7 @@
|
|||||||
PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
|
PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
SDKROOT = auto;
|
SDKROOT = auto;
|
||||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||||
SUPPORTS_MACCATALYST = NO;
|
|
||||||
SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = YES;
|
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
@ -425,9 +415,7 @@
|
|||||||
PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
|
PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
SDKROOT = auto;
|
SDKROOT = auto;
|
||||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||||
SUPPORTS_MACCATALYST = NO;
|
|
||||||
SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = YES;
|
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
TARGETED_DEVICE_FAMILY = "1,2";
|
TARGETED_DEVICE_FAMILY = "1,2";
|
||||||
|
@ -50,24 +50,9 @@ else()
|
|||||||
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (CMAKE_CROSSCOMPILING)
|
|
||||||
set(GGML_NATIVE_DEFAULT OFF)
|
|
||||||
else()
|
|
||||||
set(GGML_NATIVE_DEFAULT ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# defaults
|
|
||||||
if (NOT GGML_LLAMAFILE_DEFAULT)
|
|
||||||
set(GGML_LLAMAFILE_DEFAULT OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
|
||||||
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# general
|
# general
|
||||||
option(GGML_STATIC "ggml: static link libraries" OFF)
|
option(GGML_STATIC "ggml: static link libraries" OFF)
|
||||||
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
option(GGML_NATIVE "ggml: enable -march=native flag" ON)
|
||||||
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
||||||
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||||
|
|
||||||
@ -85,14 +70,13 @@ option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
|||||||
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
# instruction set specific
|
# instruction set specific
|
||||||
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
|
if (GGML_NATIVE)
|
||||||
set(INS_ENB OFF)
|
set(INS_ENB OFF)
|
||||||
else()
|
else()
|
||||||
set(INS_ENB ON)
|
set(INS_ENB ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
||||||
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
||||||
|
|
||||||
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
||||||
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
||||||
@ -100,9 +84,6 @@ option(GGML_AVX512 "ggml: enable AVX512" OFF)
|
|||||||
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
||||||
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
||||||
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
||||||
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
|
||||||
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
|
||||||
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
|
||||||
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
|
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
|
||||||
@ -117,40 +98,39 @@ endif()
|
|||||||
|
|
||||||
# ggml core
|
# ggml core
|
||||||
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
||||||
option(GGML_CPU "ggml: enable CPU backend" ON)
|
|
||||||
|
|
||||||
# 3rd party libs / backends
|
# 3rd party libs / backends
|
||||||
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
||||||
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
||||||
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||||
"ggml: BLAS library vendor")
|
"ggml: BLAS library vendor")
|
||||||
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
option(GGML_LLAMAFILE "ggml: use ggml SGEMM" OFF)
|
||||||
|
|
||||||
option(GGML_CUDA "ggml: use CUDA" OFF)
|
option(GGML_CUDA "ggml: use CUDA" OFF)
|
||||||
option(GGML_MUSA "ggml: use MUSA" OFF)
|
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||||
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
||||||
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
|
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
|
||||||
|
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
|
||||||
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
||||||
|
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
|
||||||
|
"ggml: iters./thread per block for Q2_K/Q6_K")
|
||||||
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||||
"ggml: max. batch size for using peer access")
|
"ggml: max. batch size for using peer access")
|
||||||
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
||||||
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
||||||
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
||||||
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
||||||
|
|
||||||
option(GGML_HIP "ggml: use HIP" OFF)
|
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
|
||||||
|
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
||||||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||||
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||||
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
||||||
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
||||||
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
||||||
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
|
||||||
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
||||||
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
||||||
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
||||||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||||
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
||||||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||||
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
||||||
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
||||||
@ -159,13 +139,10 @@ set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
|||||||
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
||||||
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
||||||
option(GGML_RPC "ggml: use RPC" OFF)
|
option(GGML_RPC "ggml: use RPC" OFF)
|
||||||
option(GGML_AMX "ggml: use AMX" OFF)
|
|
||||||
option(GGML_SYCL "ggml: use SYCL" OFF)
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
||||||
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
||||||
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
||||||
"ggml: sycl target device")
|
"ggml: sycl target device")
|
||||||
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
||||||
"ggml: sycl device architecture")
|
|
||||||
|
|
||||||
# extra artifacts
|
# extra artifacts
|
||||||
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
||||||
@ -215,34 +192,27 @@ endif ()
|
|||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
include(CMakePackageConfigHelpers)
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
# all public headers
|
|
||||||
set(GGML_PUBLIC_HEADERS
|
set(GGML_PUBLIC_HEADERS
|
||||||
include/ggml.h
|
include/ggml.h
|
||||||
include/ggml-cpu.h
|
|
||||||
include/ggml-alloc.h
|
include/ggml-alloc.h
|
||||||
include/ggml-backend.h
|
include/ggml-backend.h
|
||||||
include/ggml-blas.h
|
"${GGML_HEADERS_CUDA}"
|
||||||
include/ggml-cann.h
|
"${GGML_HEADERS_METAL}"
|
||||||
include/ggml-cuda.h
|
"${GGML_HEADERS_EXTRA}")
|
||||||
include/ggml-kompute.h
|
|
||||||
include/ggml-opt.h
|
|
||||||
include/ggml-metal.h
|
|
||||||
include/ggml-rpc.h
|
|
||||||
include/ggml-sycl.h
|
|
||||||
include/ggml-vulkan.h)
|
|
||||||
|
|
||||||
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||||
#if (GGML_METAL)
|
#if (GGML_METAL)
|
||||||
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
||||||
#endif()
|
#endif()
|
||||||
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
|
install(TARGETS ggml PUBLIC_HEADER)
|
||||||
install(TARGETS ggml-base LIBRARY)
|
|
||||||
|
if (BUILD_SHARED_LIBS)
|
||||||
|
install(TARGETS ggml LIBRARY)
|
||||||
|
endif()
|
||||||
|
|
||||||
# FIXME: this should be done in the backend cmake files
|
|
||||||
if (GGML_METAL)
|
if (GGML_METAL)
|
||||||
# FIXME: does this need to be installed with GGML_METAL_EMBED_LIBRARY?
|
|
||||||
install(
|
install(
|
||||||
FILES src/ggml-metal/ggml-metal.metal
|
FILES src/ggml-metal.metal
|
||||||
PERMISSIONS
|
PERMISSIONS
|
||||||
OWNER_READ
|
OWNER_READ
|
||||||
OWNER_WRITE
|
OWNER_WRITE
|
||||||
|
220
ggml/ggml_vk_generate_shaders.py
Normal file
220
ggml/ggml_vk_generate_shaders.py
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from tempfile import gettempdir
|
||||||
|
|
||||||
|
logger = logging.getLogger("ggml-vk-generate-shaders")
|
||||||
|
|
||||||
|
GLSLC = "glslc"
|
||||||
|
|
||||||
|
type_names = [
|
||||||
|
"f32",
|
||||||
|
"f16",
|
||||||
|
"q4_0",
|
||||||
|
"q4_1",
|
||||||
|
"q5_0",
|
||||||
|
"q5_1",
|
||||||
|
"q8_0",
|
||||||
|
"q2_k",
|
||||||
|
"q3_k",
|
||||||
|
"q4_k",
|
||||||
|
"q5_k",
|
||||||
|
"q6_k",
|
||||||
|
]
|
||||||
|
|
||||||
|
ASYNCIO_CONCURRENCY = 64
|
||||||
|
|
||||||
|
input_dir = "vulkan-shaders"
|
||||||
|
output_dir = gettempdir()
|
||||||
|
|
||||||
|
lock = asyncio.Lock()
|
||||||
|
shader_fnames = []
|
||||||
|
|
||||||
|
|
||||||
|
async def string_to_spv(name, in_fname, defines, fp16=True):
|
||||||
|
name = f"{name}{'_fp32' if not fp16 else ''}"
|
||||||
|
out_fname = os.path.join(output_dir, f"{name}.spv")
|
||||||
|
|
||||||
|
in_path = os.path.join(input_dir, in_fname)
|
||||||
|
|
||||||
|
cmd = [GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname]
|
||||||
|
|
||||||
|
cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
||||||
|
|
||||||
|
stdout, stderr = await proc.communicate()
|
||||||
|
|
||||||
|
stdout = stdout.decode()
|
||||||
|
error = stderr.decode()
|
||||||
|
|
||||||
|
if proc.returncode:
|
||||||
|
cmd = " ".join(cmd)
|
||||||
|
logger.error(f"cannot compile {name}\n\n{cmd}\n\n{error}")
|
||||||
|
return
|
||||||
|
|
||||||
|
async with lock:
|
||||||
|
shader_fnames.append((name, out_fname))
|
||||||
|
|
||||||
|
|
||||||
|
def matmul_shaders(tasks, fp16, matmul_id):
|
||||||
|
if fp16:
|
||||||
|
load_vec = "8"
|
||||||
|
aligned_b_type_f32 = "mat2x4"
|
||||||
|
aligned_b_type_f16 = "f16mat2x4"
|
||||||
|
else:
|
||||||
|
load_vec = "4"
|
||||||
|
aligned_b_type_f32 = "vec4"
|
||||||
|
aligned_b_type_f16 = "f16vec4"
|
||||||
|
|
||||||
|
base_dict = {"FLOAT_TYPE": "float" if not fp16 else "float16_t"}
|
||||||
|
shader_name = "matmul"
|
||||||
|
|
||||||
|
if matmul_id:
|
||||||
|
base_dict["MUL_MAT_ID"] = "1"
|
||||||
|
shader_name = "matmul_id"
|
||||||
|
|
||||||
|
if fp16:
|
||||||
|
base_dict["FLOAT16"] = "1"
|
||||||
|
|
||||||
|
# Shaders with f16 B_TYPE
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f32_f16", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f32_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f16", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
for tname in type_names:
|
||||||
|
data_a_key = f"DATA_A_{tname.upper()}"
|
||||||
|
load_vec_a = load_vec if tname in ("f32", "f16") else "2"
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32", "mul_mm.comp", base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32_aligned", "mul_mm.comp", base_dict | {data_a_key: "2", "LOAD_VEC_A": load_vec_a, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f32, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
for fp16 in (False, True):
|
||||||
|
# MUL_MAT
|
||||||
|
matmul_shaders(tasks, fp16, False)
|
||||||
|
# MUL_MAT_ID
|
||||||
|
matmul_shaders(tasks, fp16, True)
|
||||||
|
|
||||||
|
for tname in type_names:
|
||||||
|
base_dict = {"FLOAT_TYPE": "float"}
|
||||||
|
|
||||||
|
# mul mat vec
|
||||||
|
data_a_key = f"DATA_A_{tname.upper()}"
|
||||||
|
shader = f"mul_mat_vec_{tname}.comp" if tname.endswith("_k") else "mul_mat_vec.comp"
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f32_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f16_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_id_{tname}_f32", shader, base_dict | {"MUL_MAT_ID": "1", data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Dequant shaders
|
||||||
|
if tname != "f16":
|
||||||
|
tasks.append(string_to_spv(f"dequant_{tname}", f"dequant_{tname}.comp", base_dict | {data_a_key: "1", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
# get_rows
|
||||||
|
if not tname.endswith("_k"):
|
||||||
|
shader = "get_rows.comp" if tname in ("f32", "f16") else "get_rows_quant.comp"
|
||||||
|
|
||||||
|
if tname == "f16":
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||||
|
else:
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t"}))
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}_f32", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Norms
|
||||||
|
tasks.append(string_to_spv("norm_f32", "norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rms_norm_f32", "rms_norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("cpy_f32_f32", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("cpy_f32_f16", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||||
|
tasks.append(string_to_spv("cpy_f16_f16", "copy.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("add_f32", "add.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("mul_f32", "mul.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("div_f32", "div.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("scale_f32", "scale.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("sqr_f32", "square.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("clamp_f32", "clamp.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("gelu_f32", "gelu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("silu_f32", "silu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("relu_f32", "relu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("soft_max_f32", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("soft_max_f32_f16", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("rope_norm_f32", "rope_norm.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rope_norm_f16", "rope_norm.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("rope_neox_f32", "rope_neox.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rope_neox_f16", "rope_neox.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("argsort_f32", "argsort.comp", {"A_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("sum_rows_f32", "sum_rows.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Helper to decorate tasks with semaphore acquisition.
|
||||||
|
async def withSemaphore(sem, task):
|
||||||
|
async with sem:
|
||||||
|
return await task
|
||||||
|
|
||||||
|
# Run tasks concurrently guarded by a concurrency limit.
|
||||||
|
sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
|
||||||
|
await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
|
||||||
|
|
||||||
|
with open("ggml-vulkan-shaders.hpp", "w") as f:
|
||||||
|
f.write("#include <cstdint>\n\n")
|
||||||
|
for name, path in sorted(shader_fnames):
|
||||||
|
|
||||||
|
with open(path, "rb") as spv:
|
||||||
|
counter = 0
|
||||||
|
newline_counter = 0
|
||||||
|
f.write(f"unsigned char {name}_data[] = {{\n")
|
||||||
|
for val in spv.read():
|
||||||
|
f.write(f"0x{val:02x},")
|
||||||
|
newline_counter += 1
|
||||||
|
counter += 1
|
||||||
|
if newline_counter >= 12:
|
||||||
|
newline_counter = 0
|
||||||
|
f.write("\n")
|
||||||
|
f.write("\n};\n")
|
||||||
|
f.write(f"const uint64_t {name}_len = {counter};\n\n")
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
|
||||||
|
|
||||||
|
parser.add_argument("--glslc", help="Path to glslc")
|
||||||
|
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||||
|
|
||||||
|
if args.glslc:
|
||||||
|
GLSLC = args.glslc
|
||||||
|
|
||||||
|
asyncio.run(main())
|
@ -24,7 +24,7 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st
|
|||||||
// Graph allocator
|
// Graph allocator
|
||||||
/*
|
/*
|
||||||
Example usage:
|
Example usage:
|
||||||
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type());
|
||||||
|
|
||||||
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
||||||
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ggml.h"
|
|
||||||
#include "ggml-backend.h"
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// buffer_type API
|
|
||||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
|
|
||||||
|
|
||||||
GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);
|
|
||||||
|
|
||||||
// backend API
|
|
||||||
GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);
|
|
||||||
|
|
||||||
GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
|
|
||||||
|
|
||||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -3,20 +3,6 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
|
|
||||||
#ifdef GGML_BACKEND_SHARED
|
|
||||||
# if defined(_WIN32) && !defined(__MINGW32__)
|
|
||||||
# ifdef GGML_BACKEND_BUILD
|
|
||||||
# define GGML_BACKEND_API __declspec(dllexport) extern
|
|
||||||
# else
|
|
||||||
# define GGML_BACKEND_API __declspec(dllimport) extern
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
# define GGML_BACKEND_API extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -26,52 +12,41 @@ extern "C" {
|
|||||||
typedef struct ggml_backend_event * ggml_backend_event_t;
|
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||||
typedef struct ggml_backend * ggml_backend_t;
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
typedef void * ggml_backend_graph_plan_t;
|
typedef void * ggml_backend_graph_plan_t;
|
||||||
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
|
||||||
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Backend buffer type
|
|
||||||
//
|
|
||||||
|
|
||||||
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
|
||||||
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
|
||||||
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
|
||||||
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
|
||||||
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend buffer
|
// Backend buffer
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// buffer type
|
||||||
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
|
||||||
|
// buffer
|
||||||
enum ggml_backend_buffer_usage {
|
enum ggml_backend_buffer_usage {
|
||||||
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||||
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||||
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||||
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||||
|
|
||||||
// tensor copy between different backends
|
|
||||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend (stream)
|
// Backend
|
||||||
//
|
//
|
||||||
|
|
||||||
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||||
@ -86,10 +61,8 @@ extern "C" {
|
|||||||
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
// "offset" refers to the offset in tensor->data for setting/getting data
|
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
||||||
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
|
||||||
|
|
||||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||||
|
|
||||||
@ -99,126 +72,64 @@ extern "C" {
|
|||||||
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
// NOTE: will be removed, use device version instead
|
|
||||||
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
||||||
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// tensor copy between different backends
|
||||||
|
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
// asynchronous copy
|
// asynchronous copy
|
||||||
// the copy is performed after all the currently queued operations in backend_src
|
// the copy is performed after all the currently queued operations in backend_src
|
||||||
// backend_dst will wait for the copy to complete before performing other operations
|
// backend_dst will wait for the copy to complete before performing other operations
|
||||||
// automatic fallback to sync copy if async is not supported
|
// automatic fallback to sync copy if async is not supported
|
||||||
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
// events
|
||||||
|
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
||||||
//
|
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
||||||
// Events
|
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
||||||
//
|
|
||||||
|
|
||||||
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
|
||||||
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
|
||||||
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
|
||||||
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||||
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend device
|
// CPU backend
|
||||||
//
|
//
|
||||||
|
|
||||||
enum ggml_backend_dev_type {
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||||
// CPU device using system memory
|
|
||||||
GGML_BACKEND_DEVICE_TYPE_CPU,
|
|
||||||
// GPU device using dedicated memory
|
|
||||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
|
||||||
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
|
||||||
GGML_BACKEND_DEVICE_TYPE_ACCEL
|
|
||||||
};
|
|
||||||
|
|
||||||
// functionality supported by the device
|
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||||
struct ggml_backend_dev_caps {
|
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||||
// asynchronous operations
|
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
bool async;
|
|
||||||
// pinned host buffer
|
|
||||||
bool host_buffer;
|
|
||||||
// creating buffers from host ptr
|
|
||||||
bool buffer_from_host_ptr;
|
|
||||||
// event synchronization
|
|
||||||
bool events;
|
|
||||||
};
|
|
||||||
|
|
||||||
// all the device properties
|
// Create a backend buffer from an existing pointer
|
||||||
struct ggml_backend_dev_props {
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||||
const char * name;
|
|
||||||
const char * description;
|
|
||||||
size_t memory_free;
|
|
||||||
size_t memory_total;
|
|
||||||
enum ggml_backend_dev_type type;
|
|
||||||
struct ggml_backend_dev_caps caps;
|
|
||||||
};
|
|
||||||
|
|
||||||
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||||
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
|
||||||
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
|
||||||
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
|
||||||
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
|
||||||
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
|
||||||
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
|
||||||
|
|
||||||
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
#ifdef GGML_USE_CPU_HBM
|
||||||
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
||||||
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
#endif
|
||||||
|
|
||||||
//
|
|
||||||
// Backend (reg)
|
|
||||||
//
|
|
||||||
|
|
||||||
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
|
||||||
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
|
||||||
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
|
||||||
|
|
||||||
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
|
|
||||||
|
|
||||||
// Split buffer type for tensor parallelism
|
|
||||||
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
|
||||||
// Set the number of threads for the backend
|
|
||||||
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
|
||||||
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
|
||||||
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend registry
|
// Backend registry
|
||||||
//
|
//
|
||||||
|
|
||||||
// Backend (reg) enumeration
|
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
||||||
GGML_API size_t ggml_backend_reg_count(void);
|
|
||||||
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
|
||||||
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
|
||||||
|
|
||||||
// Device enumeration
|
GGML_API size_t ggml_backend_reg_get_count(void);
|
||||||
GGML_API size_t ggml_backend_dev_count(void);
|
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
||||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
||||||
// Direct backend (stream) initialization
|
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
|
||||||
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
|
||||||
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
|
||||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
|
||||||
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
|
||||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
|
||||||
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend scheduler
|
// Backend scheduler
|
||||||
//
|
//
|
||||||
|
|
||||||
// The backend scheduler allows for multiple backend devices to be used together
|
// The backend scheduler allows for multiple backends to be used together
|
||||||
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||||
// The backends are selected based on:
|
// The backends are selected based on:
|
||||||
// - the backend that supports the operation
|
// - the backend that supports the operation
|
||||||
@ -242,26 +153,20 @@ extern "C" {
|
|||||||
ggml_backend_sched_reserve(sched, reserve_graph);
|
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||||
|
|
||||||
// compute
|
// compute
|
||||||
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
|
graph = build_graph(sched);
|
||||||
for (int i = 0; i < 10; ++i) {
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
|
|
||||||
}
|
|
||||||
|
|
||||||
// if there are graph inputs:
|
// if there are graph inputs:
|
||||||
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
|
ggml_backend_sched_reset(sched);
|
||||||
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
|
ggml_backend_sched_alloc_graph(sched, graph);
|
||||||
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
|
ggml_backend_tensor_set(input_tensor, ...);
|
||||||
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
|
|
||||||
|
|
||||||
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
|
|
||||||
// allocate them statically via ggml_backend_alloc_ctx_tensors
|
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
struct ggml_backend_sched;
|
||||||
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||||
|
|
||||||
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
|
||||||
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||||
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||||
//
|
//
|
||||||
@ -270,12 +175,12 @@ extern "C" {
|
|||||||
//
|
//
|
||||||
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||||
|
|
||||||
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
// Initialize a backend scheduler
|
||||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
// Initialize backend buffers from a measure graph
|
// Initialize backend buffers from a measure graph
|
||||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||||
|
|
||||||
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
||||||
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
||||||
@ -290,14 +195,12 @@ extern "C" {
|
|||||||
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||||
|
|
||||||
// Allocate and compute graph on the backend scheduler
|
// Allocate and compute graph on the backend scheduler
|
||||||
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
|
// Reset all assignments and allocators - must be called before changing the node backends
|
||||||
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
|
|
||||||
// The correct way to use this API is to discard the deallocated tensors and create new ones.
|
|
||||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
// Set a callback to be called for each resulting node during graph compute
|
// Set a callback to be called for each resulting node during graph compute
|
||||||
@ -318,7 +221,7 @@ extern "C" {
|
|||||||
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||||
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||||
|
|
||||||
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||||
|
|
||||||
// Compare the output of two backends
|
// Compare the output of two backends
|
||||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||||
@ -327,9 +230,6 @@ extern "C" {
|
|||||||
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||||
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
||||||
|
|
||||||
// CPU buffer types are always available
|
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user