mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-04 16:30:58 +02:00
Compare commits
8 Commits
Author | SHA1 | Date | |
---|---|---|---|
c7b6988678 | |||
05042a782d | |||
a7dc2aab16 | |||
22d46b7ba4 | |||
c10db6ea28 | |||
1b51fdf170 | |||
adee3f9c1f | |||
4798be1f9a |
92
.github/workflows/build.yml
vendored
92
.github/workflows/build.yml
vendored
@ -15,10 +15,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Build ${{ matrix.arch }}
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -53,10 +53,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
uses: cross-platform-actions/action@v0.15.0
|
uses: cross-platform-actions/action@v0.24.0
|
||||||
with:
|
with:
|
||||||
operating_system: freebsd
|
operating_system: freebsd
|
||||||
version: '13.2'
|
version: '13.2'
|
||||||
@ -77,10 +77,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Build ${{ matrix.arch }}
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
@ -105,10 +105,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Build ${{ matrix.arch }}
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
@ -133,10 +133,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Build ${{ matrix.arch }}
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
@ -165,7 +165,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: add oneAPI to apt
|
- name: add oneAPI to apt
|
||||||
shell: bash
|
shell: bash
|
||||||
@ -189,7 +189,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
@ -215,7 +215,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: add oneAPI to apt
|
- name: add oneAPI to apt
|
||||||
shell: bash
|
shell: bash
|
||||||
@ -239,7 +239,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
@ -262,7 +262,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Setup ${{ matrix.sys }}
|
- name: Setup ${{ matrix.sys }}
|
||||||
uses: msys2/setup-msys2@v2
|
uses: msys2/setup-msys2@v2
|
||||||
@ -328,10 +328,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Fetch SDL2 and set SDL2_DIR
|
- name: Fetch SDL2 and set SDL2_DIR
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -356,14 +356,14 @@ jobs:
|
|||||||
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
- name: Upload dll
|
- name: Upload dll
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.jnaPath }}_whisper.dll
|
name: ${{ matrix.jnaPath }}_whisper.dll
|
||||||
path: build/bin/${{ matrix.build }}/whisper.dll
|
path: build/bin/${{ matrix.build }}/whisper.dll
|
||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v1
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-bin-${{ matrix.arch }}
|
name: whisper-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
@ -392,10 +392,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Fetch OpenBLAS
|
- name: Fetch OpenBLAS
|
||||||
if: matrix.blas == 'ON'
|
if: matrix.blas == 'ON'
|
||||||
@ -453,7 +453,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v1
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
|
name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
@ -476,14 +476,14 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Install CUDA Toolkit
|
- name: Install CUDA Toolkit
|
||||||
id: cuda-toolkit
|
id: cuda-toolkit
|
||||||
uses: Jimver/cuda-toolkit@v0.2.11
|
uses: Jimver/cuda-toolkit@v0.2.15
|
||||||
with:
|
with:
|
||||||
cuda: '${{ matrix.cuda-toolkit }}'
|
cuda: '${{ matrix.cuda-toolkit }}'
|
||||||
|
|
||||||
@ -519,7 +519,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v1
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
|
name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
@ -533,10 +533,10 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Setup emsdk
|
- name: Setup emsdk
|
||||||
uses: mymindstorm/setup-emsdk@v12
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
|
|
||||||
- name: Verify
|
- name: Verify
|
||||||
run: emcc -v
|
run: emcc -v
|
||||||
@ -555,7 +555,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Configure
|
- name: Configure
|
||||||
run: |
|
run: |
|
||||||
@ -573,24 +573,24 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
path: whisper
|
path: whisper
|
||||||
|
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
repository: ggerganov/ggml
|
repository: ggerganov/ggml
|
||||||
path: ggml
|
path: ggml
|
||||||
|
|
||||||
- name: Install Java
|
- name: Install Java
|
||||||
uses: actions/setup-java@v3
|
uses: actions/setup-java@v4
|
||||||
with:
|
with:
|
||||||
distribution: zulu
|
distribution: zulu
|
||||||
java-version: 17
|
java-version: 21
|
||||||
|
|
||||||
- name: Setup Android SDK
|
- name: Setup Android SDK
|
||||||
uses: android-actions/setup-android@v2
|
uses: android-actions/setup-android@v3
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
@ -608,20 +608,19 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: set up JDK 11
|
- name: set up JDK 11
|
||||||
uses: actions/setup-java@v3
|
uses: actions/setup-java@v4
|
||||||
with:
|
with:
|
||||||
java-version: '11'
|
java-version: '11'
|
||||||
distribution: 'temurin'
|
distribution: 'temurin'
|
||||||
cache: gradle
|
cache: gradle
|
||||||
|
|
||||||
- name: Setup Android SDK
|
- name: Setup Android SDK
|
||||||
uses: android-actions/setup-android@v2
|
uses: android-actions/setup-android@v3
|
||||||
with:
|
with:
|
||||||
api-level: 30
|
cmdline-tools-version: 9.0
|
||||||
build-tools-version: 30.0.3
|
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
@ -633,15 +632,16 @@ jobs:
|
|||||||
needs: [ 'windows' ]
|
needs: [ 'windows' ]
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install Java
|
- name: Install Java
|
||||||
uses: actions/setup-java@v1
|
uses: actions/setup-java@v4
|
||||||
with:
|
with:
|
||||||
java-version: 17
|
distribution: zulu
|
||||||
|
java-version: 20
|
||||||
|
|
||||||
- name: Download Windows lib
|
- name: Download Windows lib
|
||||||
uses: actions/download-artifact@v3
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: win32-x86-64_whisper.dll
|
name: win32-x86-64_whisper.dll
|
||||||
path: bindings/java/build/generated/resources/main/win32-x86-64
|
path: bindings/java/build/generated/resources/main/win32-x86-64
|
||||||
@ -654,7 +654,7 @@ jobs:
|
|||||||
./gradlew build
|
./gradlew build
|
||||||
|
|
||||||
- name: Upload jar
|
- name: Upload jar
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whispercpp.jar
|
name: whispercpp.jar
|
||||||
path: bindings/java/build/libs/whispercpp-*.jar
|
path: bindings/java/build/libs/whispercpp-*.jar
|
||||||
@ -676,7 +676,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Test quantize
|
- name: Test quantize
|
||||||
run: |
|
run: |
|
||||||
|
@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 3.5)
|
|||||||
# Allow for the creation of solution folders.
|
# Allow for the creation of solution folders.
|
||||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||||
|
|
||||||
project(whisper.cpp VERSION 1.6.0)
|
project(whisper.cpp VERSION 1.6.2)
|
||||||
set(SOVERSION 1)
|
set(SOVERSION 1)
|
||||||
|
|
||||||
# Add path to modules
|
# Add path to modules
|
||||||
@ -59,6 +59,10 @@ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDA
|
|||||||
|
|
||||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||||
|
|
||||||
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
|
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||||
option(WHISPER_NO_AVX512 "whisper: disable AVX512" ON)
|
option(WHISPER_NO_AVX512 "whisper: disable AVX512" ON)
|
||||||
@ -125,6 +129,26 @@ else()
|
|||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WHISPER_FFMPEG)
|
||||||
|
# As of cmake 3.27, there is no official cmake support for FindFFmpeg.
|
||||||
|
# Consequnelty we added a FindFFmpeg.cmake script the cmake subfolder:
|
||||||
|
# whisper.cpp does not need the full ffmpeg libs, just AVFORMAT AVCODEC AVUTIL SWRESAMPLE
|
||||||
|
# libswresample performs highly optimized audio resampling, rematrixing and sample format conversion operations
|
||||||
|
# libavcodec provides a generic encoding/decoding framework and contains multiple decoders and encoders for audio, video and subtitle streams, and several bitstream filters.
|
||||||
|
# libavformat provides a generic framework for multiplexing and demultiplexing (muxing and demuxing) audio, video and subtitle streams.
|
||||||
|
find_package(FFmpeg REQUIRED)
|
||||||
|
if (NOT ${FFMPEG_FOUND})
|
||||||
|
message(FATAL_ERROR "Cannot find ffmpeg libs/headers")
|
||||||
|
endif()
|
||||||
|
message(STATUS "Found ffmpeg libs: ${FFMPEG_LIBRARIES}")
|
||||||
|
message(STATUS "Found ffmpeg headers in: ${FFMPEG_INCLUDE_DIRS}")
|
||||||
|
message(STATUS "ffmpeg definitions: ${FFMPEG_DEFINITIONS}")
|
||||||
|
message(STATUS "Found avformat ${AVFORMAT_VERSION}")
|
||||||
|
include_directories(${FFMPEG_INCLUDE_DIRS})
|
||||||
|
add_compile_definitions(WHISPER_FFMPEG)
|
||||||
|
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
|
||||||
|
endif()
|
||||||
|
|
||||||
# on APPLE
|
# on APPLE
|
||||||
if (APPLE)
|
if (APPLE)
|
||||||
# include Accelerate framework
|
# include Accelerate framework
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||||
|
|
||||||
Stable: [v1.6.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
Stable: [v1.6.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||||
|
|
||||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||||
|
|
||||||
|
Submodule bindings/ios updated: 5cfcfb0801...a2085436c2
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "whisper.cpp",
|
"name": "whisper.cpp",
|
||||||
"version": "1.6.0",
|
"version": "1.6.2",
|
||||||
"description": "Whisper speech recognition",
|
"description": "Whisper speech recognition",
|
||||||
"main": "whisper.js",
|
"main": "whisper.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
12
bindings/ruby/Rakefile
Normal file
12
bindings/ruby/Rakefile
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
require 'rake/clean'
|
||||||
|
require 'rubygems/package'
|
||||||
|
|
||||||
|
desc 'Build gem'
|
||||||
|
task :package do
|
||||||
|
spec_source = File.read File.join(File.dirname(__FILE__),'whispercpp.gemspec')
|
||||||
|
spec = nil
|
||||||
|
# see: http://gist.github.com/16215
|
||||||
|
Thread.new { spec = eval("#{spec_source}") }.join
|
||||||
|
spec.validate
|
||||||
|
Gem::Package.build(spec)
|
||||||
|
end
|
@ -12,31 +12,63 @@ extern "C" {
|
|||||||
// Backend buffer
|
// Backend buffer
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// buffer type
|
||||||
|
typedef void * ggml_backend_buffer_type_context_t;
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_type_i {
|
||||||
|
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
|
||||||
|
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
|
||||||
|
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
|
||||||
|
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
|
||||||
|
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
|
||||||
|
// check if tensor data is in host memory
|
||||||
|
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
|
||||||
|
bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_type {
|
||||||
|
struct ggml_backend_buffer_type_i iface;
|
||||||
|
ggml_backend_buffer_type_context_t context;
|
||||||
|
};
|
||||||
|
|
||||||
|
// buffer
|
||||||
typedef void * ggml_backend_buffer_context_t;
|
typedef void * ggml_backend_buffer_context_t;
|
||||||
|
|
||||||
struct ggml_backend_buffer_i {
|
struct ggml_backend_buffer_i {
|
||||||
void (*free_buffer) (ggml_backend_buffer_t buffer);
|
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
|
||||||
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
|
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
|
||||||
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
|
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
|
||||||
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
|
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
|
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
|
||||||
|
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_backend_buffer {
|
struct ggml_backend_buffer {
|
||||||
struct ggml_backend_buffer_i iface;
|
struct ggml_backend_buffer_i iface;
|
||||||
|
ggml_backend_buffer_type_t buft;
|
||||||
ggml_backend_t backend;
|
|
||||||
ggml_backend_buffer_context_t context;
|
ggml_backend_buffer_context_t context;
|
||||||
|
|
||||||
size_t size;
|
size_t size;
|
||||||
|
enum ggml_backend_buffer_usage usage;
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||||
struct ggml_backend * backend,
|
ggml_backend_buffer_type_t buft,
|
||||||
struct ggml_backend_buffer_i iface,
|
struct ggml_backend_buffer_i iface,
|
||||||
ggml_backend_buffer_context_t context,
|
ggml_backend_buffer_context_t context,
|
||||||
size_t size);
|
size_t size);
|
||||||
|
|
||||||
|
// do not use directly, use ggml_backend_tensor_copy instead
|
||||||
|
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// buffer that contains a collection of buffers
|
||||||
|
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
||||||
|
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend
|
// Backend
|
||||||
//
|
//
|
||||||
@ -44,44 +76,66 @@ extern "C" {
|
|||||||
typedef void * ggml_backend_context_t;
|
typedef void * ggml_backend_context_t;
|
||||||
|
|
||||||
struct ggml_backend_i {
|
struct ggml_backend_i {
|
||||||
const char * (*get_name)(ggml_backend_t backend);
|
const char * (*GGML_CALL get_name)(ggml_backend_t backend);
|
||||||
|
|
||||||
void (*free)(ggml_backend_t backend);
|
void (*GGML_CALL free)(ggml_backend_t backend);
|
||||||
|
|
||||||
// buffer allocation
|
// buffer allocation
|
||||||
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
|
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
|
||||||
|
|
||||||
// get buffer alignment
|
// (optional) asynchronous tensor data access
|
||||||
size_t (*get_alignment)(ggml_backend_t backend);
|
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
// tensor data access
|
// (optional) complete all pending operations
|
||||||
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
|
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
||||||
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
||||||
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
||||||
void (*synchronize) (ggml_backend_t backend);
|
|
||||||
|
|
||||||
// (optional) copy tensor between different backends, allow for single-copy tranfers
|
// compute graph with a plan (not used currently)
|
||||||
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
|
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
||||||
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
|
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
|
||||||
// compute graph with a plan
|
// compute graph with a plan
|
||||||
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
// compute graph without a plan (async)
|
||||||
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
// compute graph without a plan
|
|
||||||
bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
||||||
|
|
||||||
// check if the backend supports an operation
|
// check if the backend supports an operation
|
||||||
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
|
||||||
|
// these should be expensive operations with large batch sizes that may benefit from running on this backend
|
||||||
|
// even if the weight has to be copied from the CPU temporarily
|
||||||
|
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// (optional) event synchronization
|
||||||
|
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
|
||||||
|
void (*GGML_CALL event_free) (ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
||||||
|
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_backend {
|
struct ggml_backend {
|
||||||
struct ggml_backend_i iface;
|
ggml_guid_t guid;
|
||||||
|
|
||||||
|
struct ggml_backend_i iface;
|
||||||
ggml_backend_context_t context;
|
ggml_backend_context_t context;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_event {
|
||||||
|
ggml_backend_t backend;
|
||||||
|
void * context;
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend registry
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
|
||||||
|
|
||||||
|
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -7,69 +7,123 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||||
|
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||||
|
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
typedef void * ggml_backend_graph_plan_t;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend buffer
|
// Backend buffer
|
||||||
//
|
//
|
||||||
|
|
||||||
struct ggml_backend_buffer;
|
// buffer type
|
||||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
||||||
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
|
||||||
// backend buffer functions
|
// buffer
|
||||||
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
enum ggml_backend_buffer_usage {
|
||||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||||
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
};
|
||||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
||||||
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||||
GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend
|
// Backend
|
||||||
//
|
//
|
||||||
|
|
||||||
struct ggml_backend;
|
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||||
typedef struct ggml_backend * ggml_backend_t;
|
|
||||||
typedef void * ggml_backend_graph_plan_t;
|
|
||||||
|
|
||||||
GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
|
|
||||||
|
|
||||||
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||||
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||||
|
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||||
|
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||||
|
|
||||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
||||||
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
||||||
|
|
||||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||||
|
|
||||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
|
||||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
// tensor copy between different backends
|
// tensor copy between different backends
|
||||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// asynchronous copy
|
||||||
|
// the copy is performed after all the currently queued operations in backend_src
|
||||||
|
// backend_dst will wait for the copy to complete before performing other operations
|
||||||
|
// automatic fallback to sync copy if async is not supported
|
||||||
|
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// events
|
||||||
|
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
|
||||||
|
|
||||||
//
|
//
|
||||||
// CPU backend
|
// CPU backend
|
||||||
//
|
//
|
||||||
|
|
||||||
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||||
|
|
||||||
GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
|
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||||
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||||
|
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
|
||||||
// Create a backend buffer from an existing pointer
|
// Create a backend buffer from an existing pointer
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef GGML_USE_CPU_HBM
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend registry
|
||||||
|
//
|
||||||
|
|
||||||
|
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
||||||
|
|
||||||
|
GGML_API size_t ggml_backend_reg_get_count(void);
|
||||||
|
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
|
||||||
|
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend scheduler
|
// Backend scheduler
|
||||||
@ -83,53 +137,96 @@ extern "C" {
|
|||||||
/*
|
/*
|
||||||
Example usage:
|
Example usage:
|
||||||
|
|
||||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
|
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||||
// sched is initialized with measure allocators and cannot be used until allocated with a measure graph
|
// preferrably to run on the same backend as the buffer
|
||||||
|
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||||
|
|
||||||
// initialize buffers from a measure graph
|
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||||
measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
|
|
||||||
|
|
||||||
// in build_graph:
|
// initialize buffers from a max size graph (optional)
|
||||||
build_graph(...) {
|
reserve_graph = build_graph(sched, max_batch_size);
|
||||||
// allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
|
|
||||||
alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
|
|
||||||
ggml_allocr_alloc(alloc_cpu, tensor);
|
|
||||||
|
|
||||||
// manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
|
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||||
ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
|
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||||
}
|
|
||||||
|
|
||||||
// allocate backend buffers from measure graph
|
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||||
ggml_backend_sched_init_measure(sched, measure_graph);
|
|
||||||
|
|
||||||
// the scheduler is now ready to compute graphs
|
|
||||||
|
|
||||||
// compute
|
// compute
|
||||||
graph = build_graph(sched);
|
graph = build_graph(sched);
|
||||||
ggml_backend_sched_graph_compute(sched, graph);
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
|
|
||||||
|
// if there are graph inputs:
|
||||||
|
ggml_backend_sched_reset(sched);
|
||||||
|
ggml_backend_sched_alloc_graph(sched, graph);
|
||||||
|
ggml_backend_tensor_set(input_tensor, ...);
|
||||||
|
ggml_backend_sched_graph_compute(sched, graph);
|
||||||
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct ggml_backend_sched;
|
struct ggml_backend_sched;
|
||||||
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||||
|
|
||||||
// Initialize a backend scheduler
|
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends);
|
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||||
|
//
|
||||||
|
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
||||||
|
// if the user returns false, the scheduler will cancel the graph compute
|
||||||
|
//
|
||||||
|
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||||
|
|
||||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
// Initialize a backend scheduler
|
||||||
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||||
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
// Initialize backend buffers from a measure graph
|
// Initialize backend buffers from a measure graph
|
||||||
GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||||
|
|
||||||
GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
|
// Get the number of splits of the last graph
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||||
|
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||||
|
|
||||||
|
// Allocate and compute graph on the backend scheduler
|
||||||
|
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Reset all assignments and allocators - must be called before changing the node backends
|
||||||
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Set a callback to be called for each resulting node during graph compute
|
||||||
|
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct ggml_backend_graph_copy {
|
||||||
|
ggml_backend_buffer_t buffer;
|
||||||
|
struct ggml_context * ctx_allocated;
|
||||||
|
struct ggml_context * ctx_unallocated;
|
||||||
|
struct ggml_cgraph * graph;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Copy a graph to a different backend
|
||||||
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||||
|
|
||||||
|
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||||
|
|
||||||
|
// Compare the output of two backends
|
||||||
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
// Tensor initialization
|
||||||
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||||
|
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
// Allocate a graph on the backend scheduler
|
|
||||||
GGML_API void ggml_backend_sched_graph_compute(
|
|
||||||
ggml_backend_sched_t sched,
|
|
||||||
struct ggml_cgraph * graph);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
1853
bindings/ruby/ext/ggml-common.h
Normal file
1853
bindings/ruby/ext/ggml-common.h
Normal file
File diff suppressed because it is too large
Load Diff
43
bindings/ruby/ext/ggml-cuda.h
Normal file
43
bindings/ruby/ext/ggml-cuda.h
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef GGML_USE_HIPBLAS
|
||||||
|
#define GGML_CUDA_NAME "ROCm"
|
||||||
|
#define GGML_CUBLAS_NAME "hipBLAS"
|
||||||
|
#else
|
||||||
|
#define GGML_CUDA_NAME "CUDA"
|
||||||
|
#define GGML_CUBLAS_NAME "cuBLAS"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_CUDA_MAX_DEVICES 16
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// device buffer
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
@ -5,6 +5,7 @@
|
|||||||
// GGML internal header
|
// GGML internal header
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <string.h> // memcpy
|
#include <string.h> // memcpy
|
||||||
@ -18,6 +19,7 @@ extern "C" {
|
|||||||
// fall back to the _Static_assert C11 keyword.
|
// fall back to the _Static_assert C11 keyword.
|
||||||
// if C99 - static_assert is noop
|
// if C99 - static_assert is noop
|
||||||
// ref: https://stackoverflow.com/a/53923785/4039976
|
// ref: https://stackoverflow.com/a/53923785/4039976
|
||||||
|
#ifndef __cplusplus
|
||||||
#ifndef static_assert
|
#ifndef static_assert
|
||||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
||||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
||||||
@ -25,6 +27,7 @@ extern "C" {
|
|||||||
#define static_assert(cond, msg) struct global_scope_noop_trick
|
#define static_assert(cond, msg) struct global_scope_noop_trick
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
||||||
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
||||||
@ -34,16 +37,17 @@ extern "C" {
|
|||||||
#ifndef __F16C__
|
#ifndef __F16C__
|
||||||
#define __F16C__
|
#define __F16C__
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
|
||||||
|
#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
|
||||||
#ifndef __SSE3__
|
#ifndef __SSE3__
|
||||||
#define __SSE3__
|
#define __SSE3__
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef __SSSE3__
|
||||||
|
#define __SSSE3__
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef MIN
|
|
||||||
#undef MAX
|
|
||||||
|
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
||||||
|
|
||||||
// 16-bit float
|
// 16-bit float
|
||||||
// on Arm, we use __fp16
|
// on Arm, we use __fp16
|
||||||
@ -56,14 +60,30 @@ extern "C" {
|
|||||||
//
|
//
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
|
typedef __fp16 ggml_fp16_internal_t;
|
||||||
#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
|
|
||||||
|
|
||||||
#define GGML_FP16_TO_FP32(x) ((float) (x))
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
#define GGML_FP32_TO_FP16(x) (x)
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
|
||||||
|
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
|
||||||
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
|
ggml_fp16_internal_t tmp;
|
||||||
|
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
||||||
|
return (float)tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
ggml_fp16_t res;
|
||||||
|
ggml_fp16_internal_t tmp = f;
|
||||||
|
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
typedef uint16_t ggml_fp16_internal_t;
|
||||||
|
|
||||||
#ifdef __wasm_simd128__
|
#ifdef __wasm_simd128__
|
||||||
#include <wasm_simd128.h>
|
#include <wasm_simd128.h>
|
||||||
#else
|
#else
|
||||||
@ -217,8 +237,7 @@ extern float ggml_table_f32_f16[1 << 16];
|
|||||||
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
||||||
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
||||||
// This is also true for POWER9.
|
// This is also true for POWER9.
|
||||||
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
|
#if !defined(GGML_FP16_TO_FP32)
|
||||||
|
|
||||||
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
||||||
uint16_t s;
|
uint16_t s;
|
||||||
memcpy(&s, &f, sizeof(uint16_t));
|
memcpy(&s, &f, sizeof(uint16_t));
|
||||||
@ -226,19 +245,23 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
||||||
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
#endif
|
||||||
|
|
||||||
|
#if !defined(GGML_FP32_TO_FP16)
|
||||||
|
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GGML_HASHTABLE_FULL ((size_t)-1)
|
#define GGML_HASHTABLE_FULL ((size_t)-1)
|
||||||
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
|
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
|
||||||
|
|
||||||
|
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
||||||
|
|
||||||
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
||||||
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
// returns GGML_HAHSHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
||||||
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
// return index, asserts if table is full
|
// return index, asserts if table is full
|
||||||
|
46
bindings/ruby/ext/ggml-kompute.h
Normal file
46
bindings/ruby/ext/ggml-kompute.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ggml_vk_device {
|
||||||
|
int index;
|
||||||
|
int type; // same as VkPhysicalDeviceType
|
||||||
|
size_t heapSize;
|
||||||
|
const char * name;
|
||||||
|
const char * vendor;
|
||||||
|
int subgroupSize;
|
||||||
|
uint64_t bufferAlignment;
|
||||||
|
uint64_t maxAlloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||||
|
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||||
|
bool ggml_vk_has_vulkan(void);
|
||||||
|
bool ggml_vk_has_device(void);
|
||||||
|
struct ggml_vk_device ggml_vk_current_device(void);
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
//
|
||||||
|
|
||||||
|
// forward declaration
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
|
||||||
|
GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
66
bindings/ruby/ext/ggml-metal.h
Normal file
66
bindings/ruby/ext/ggml-metal.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// An interface allowing to compute ggml_cgraph with Metal
|
||||||
|
//
|
||||||
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||||
|
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
|
||||||
|
//
|
||||||
|
// How it works?
|
||||||
|
//
|
||||||
|
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
||||||
|
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
||||||
|
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
||||||
|
//
|
||||||
|
// You only need to make sure that all memory buffers that you used during the graph creation
|
||||||
|
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
||||||
|
// used during the graph evaluation to determine the arguments of the compute kernels.
|
||||||
|
//
|
||||||
|
// Synchronization between device and host memory (for example for input and output tensors)
|
||||||
|
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
// max memory buffers that can be mapped to the device
|
||||||
|
#define GGML_METAL_MAX_BUFFERS 64
|
||||||
|
|
||||||
|
struct ggml_tensor;
|
||||||
|
struct ggml_cgraph;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
// user-code should use only these functions
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||||
|
|
||||||
|
// helper to check if the device supports a specific family
|
||||||
|
// ideally, the user code should be doing these checks
|
||||||
|
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
|
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||||
|
|
||||||
|
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||||
|
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
36
bindings/ruby/ext/ggml-opencl.h
Normal file
36
bindings/ruby/ext/ggml-opencl.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_init(void);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
||||||
|
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||||
|
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
||||||
|
|
||||||
|
// GGML_API void * ggml_cl_host_malloc(size_t size);
|
||||||
|
// GGML_API void ggml_cl_host_free(void * ptr);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
||||||
|
|
||||||
|
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
|
||||||
|
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
|
||||||
|
|
||||||
|
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
||||||
|
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
@ -1,224 +1,133 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ggml-impl.h"
|
#define GGML_COMMON_DECL_C
|
||||||
|
#include "ggml-common.h"
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
// GGML internal header
|
// GGML internal header
|
||||||
|
|
||||||
#include <stdint.h>
|
#ifdef __cplusplus
|
||||||
#include <stddef.h>
|
extern "C" {
|
||||||
|
|
||||||
#define QK4_0 32
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // delta
|
|
||||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
|
||||||
} block_q4_0;
|
|
||||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
|
||||||
|
|
||||||
#define QK4_1 32
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // delta
|
|
||||||
ggml_fp16_t m; // min
|
|
||||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
|
||||||
} block_q4_1;
|
|
||||||
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
|
||||||
|
|
||||||
#define QK5_0 32
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // delta
|
|
||||||
uint8_t qh[4]; // 5-th bit of quants
|
|
||||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
|
||||||
} block_q5_0;
|
|
||||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
|
||||||
|
|
||||||
#define QK5_1 32
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // delta
|
|
||||||
ggml_fp16_t m; // min
|
|
||||||
uint8_t qh[4]; // 5-th bit of quants
|
|
||||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
|
||||||
} block_q5_1;
|
|
||||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
|
||||||
|
|
||||||
#define QK8_0 32
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // delta
|
|
||||||
int8_t qs[QK8_0]; // quants
|
|
||||||
} block_q8_0;
|
|
||||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
|
||||||
|
|
||||||
#define QK8_1 32
|
|
||||||
typedef struct {
|
|
||||||
float d; // delta
|
|
||||||
float s; // d * sum(qs[i])
|
|
||||||
int8_t qs[QK8_1]; // quants
|
|
||||||
} block_q8_1;
|
|
||||||
static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
|
|
||||||
|
|
||||||
//
|
|
||||||
// Super-block quantization structures
|
|
||||||
//
|
|
||||||
|
|
||||||
// Super-block size
|
|
||||||
#ifdef GGML_QKK_64
|
|
||||||
#define QK_K 64
|
|
||||||
#define K_SCALE_SIZE 4
|
|
||||||
#else
|
|
||||||
#define QK_K 256
|
|
||||||
#define K_SCALE_SIZE 12
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 2-bit quantization
|
|
||||||
// weight is represented as x = a * q + b
|
|
||||||
// 16 blocks of 16 elements each
|
|
||||||
// Effectively 2.5625 bits per weight
|
|
||||||
typedef struct {
|
|
||||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
|
||||||
uint8_t qs[QK_K/4]; // quants
|
|
||||||
ggml_fp16_t d; // super-block scale for quantized scales
|
|
||||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
||||||
} block_q2_K;
|
|
||||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
|
||||||
|
|
||||||
// 3-bit quantization
|
|
||||||
// weight is represented as x = a * q
|
|
||||||
// 16 blocks of 16 elements each
|
|
||||||
// Effectively 3.4375 bits per weight
|
|
||||||
#ifdef GGML_QKK_64
|
|
||||||
typedef struct {
|
|
||||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
|
||||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
|
||||||
uint8_t scales[2];
|
|
||||||
ggml_fp16_t d; // super-block scale
|
|
||||||
} block_q3_K;
|
|
||||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
|
|
||||||
#else
|
|
||||||
typedef struct {
|
|
||||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
|
||||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
|
||||||
uint8_t scales[12]; // scales, quantized with 6 bits
|
|
||||||
ggml_fp16_t d; // super-block scale
|
|
||||||
} block_q3_K;
|
|
||||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// 4-bit quantization
|
|
||||||
// 8 blocks of 32 elements each
|
|
||||||
// weight is represented as x = a * q + b
|
|
||||||
// Effectively 4.5 bits per weight
|
|
||||||
#ifdef GGML_QKK_64
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d[2]; // super-block scales/mins
|
|
||||||
uint8_t scales[2]; // 4-bit block scales/mins
|
|
||||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
|
||||||
} block_q4_K;
|
|
||||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
|
||||||
#else
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // super-block scale for quantized scales
|
|
||||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
||||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
||||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
|
||||||
} block_q4_K;
|
|
||||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// 5-bit quantization
|
|
||||||
// 8 blocks of 32 elements each
|
|
||||||
// weight is represented as x = a * q + b
|
|
||||||
// Effectively 5.5 bits per weight
|
|
||||||
#ifdef GGML_QKK_64
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // super-block scale
|
|
||||||
int8_t scales[QK_K/16]; // 8-bit block scales
|
|
||||||
uint8_t qh[QK_K/8]; // quants, high bit
|
|
||||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
|
||||||
} block_q5_K;
|
|
||||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
|
||||||
#else
|
|
||||||
typedef struct {
|
|
||||||
ggml_fp16_t d; // super-block scale for quantized scales
|
|
||||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
||||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
||||||
uint8_t qh[QK_K/8]; // quants, high bit
|
|
||||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
|
||||||
} block_q5_K;
|
|
||||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// 6-bit quantization
|
|
||||||
// weight is represented as x = a * q
|
|
||||||
// 16 blocks of 16 elements each
|
|
||||||
// Effectively 6.5625 bits per weight
|
|
||||||
typedef struct {
|
|
||||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
|
||||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
|
||||||
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
|
||||||
ggml_fp16_t d; // super-block scale
|
|
||||||
} block_q6_K;
|
|
||||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
|
|
||||||
|
|
||||||
// This is only used for intermediate quantization and dot products
|
|
||||||
typedef struct {
|
|
||||||
float d; // delta
|
|
||||||
int8_t qs[QK_K]; // quants
|
|
||||||
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
|
|
||||||
} block_q8_K;
|
|
||||||
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
|
|
||||||
|
|
||||||
|
|
||||||
// Quantization
|
// Quantization
|
||||||
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
|
void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
|
void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
|
void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
|
void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
|
void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
|
void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
|
void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
|
void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
|
void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
|
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
|
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
|
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
|
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
|
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
|
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
|
void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
|
void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
|
|
||||||
|
|
||||||
void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
|
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
// Dequantization
|
// Dequantization
|
||||||
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
|
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
|
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
|
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
|
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
|
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
|
//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
|
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
|
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
|
|
||||||
// Dot product
|
// Dot product
|
||||||
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
|
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
||||||
|
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
|
||||||
|
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
||||||
|
|
||||||
|
void iq2xs_init_impl(enum ggml_type type);
|
||||||
|
void iq2xs_free_impl(enum ggml_type type);
|
||||||
|
void iq3xs_init_impl(int grid_size);
|
||||||
|
void iq3xs_free_impl(int grid_size);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
||||||
void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
||||||
void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
||||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
||||||
void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
||||||
|
49
bindings/ruby/ext/ggml-sycl.h
Normal file
49
bindings/ruby/ext/ggml-sycl.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
//
|
||||||
|
// MIT license
|
||||||
|
// Copyright (C) 2024 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
|
// devide buffer
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||||
|
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
||||||
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||||
|
|
||||||
|
// TODO: these are temporary
|
||||||
|
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
||||||
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||||
|
|
||||||
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
|
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
29
bindings/ruby/ext/ggml-vulkan.h
Normal file
29
bindings/ruby/ext/ggml-vulkan.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_VK_NAME "Vulkan"
|
||||||
|
#define GGML_VK_MAX_DEVICES 16
|
||||||
|
|
||||||
|
GGML_API void ggml_vk_instance_init(void);
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
|
||||||
|
GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
28
bindings/ruby/whispercpp.gemspec
Normal file
28
bindings/ruby/whispercpp.gemspec
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
Gem::Specification.new do |s|
|
||||||
|
s.name = "whispercpp"
|
||||||
|
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
||||||
|
s.version = '1.3.0'
|
||||||
|
s.date = '2024-05-14'
|
||||||
|
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
||||||
|
s.email = 'todd.fisher@gmail.com'
|
||||||
|
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
||||||
|
|
||||||
|
s.files = ["LICENSE", "README.md", "Rakefile", "ext/extconf.rb", "ext/ggml.c", "ext/ruby_whisper.cpp", "ext/whisper.cpp", "ext/dr_wav.h", "ext/ggml.h", "ext/ruby_whisper.h", "ext/whisper.h"]
|
||||||
|
|
||||||
|
#### Load-time details
|
||||||
|
s.require_paths = ['lib','ext']
|
||||||
|
s.summary = %q{Ruby whisper.cpp bindings}
|
||||||
|
s.test_files = ["tests/test_whisper.rb"]
|
||||||
|
|
||||||
|
s.extensions << 'ext/extconf.rb'
|
||||||
|
|
||||||
|
|
||||||
|
#### Documentation and testing.
|
||||||
|
s.homepage = 'https://github.com/ggerganov/whisper.cpp'
|
||||||
|
s.rdoc_options = ['--main', '../../README.md']
|
||||||
|
|
||||||
|
|
||||||
|
s.platform = Gem::Platform::RUBY
|
||||||
|
|
||||||
|
s.licenses = ['MIT']
|
||||||
|
end
|
163
cmake/FindFFmpeg.cmake
Normal file
163
cmake/FindFFmpeg.cmake
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# From
|
||||||
|
# https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake
|
||||||
|
#
|
||||||
|
# vim: ts=2 sw=2
|
||||||
|
# - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC)
|
||||||
|
#
|
||||||
|
# Once done this will define
|
||||||
|
# FFMPEG_FOUND - System has the all required components.
|
||||||
|
# FFMPEG_INCLUDE_DIRS - Include directory necessary for using the required components headers.
|
||||||
|
# FFMPEG_LIBRARIES - Link these to use the required ffmpeg components.
|
||||||
|
# FFMPEG_DEFINITIONS - Compiler switches required for using the required ffmpeg components.
|
||||||
|
#
|
||||||
|
# For each of the components it will additionally set.
|
||||||
|
# - AVCODEC
|
||||||
|
# - AVDEVICE
|
||||||
|
# - AVFORMAT
|
||||||
|
# - AVFILTER
|
||||||
|
# - AVUTIL
|
||||||
|
# - POSTPROC
|
||||||
|
# - SWSCALE
|
||||||
|
# the following variables will be defined
|
||||||
|
# <component>_FOUND - System has <component>
|
||||||
|
# <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers
|
||||||
|
# <component>_LIBRARIES - Link these to use <component>
|
||||||
|
# <component>_DEFINITIONS - Compiler switches required for using <component>
|
||||||
|
# <component>_VERSION - The components version
|
||||||
|
#
|
||||||
|
# Copyright (c) 2006, Matthias Kretz, <kretz@kde.org>
|
||||||
|
# Copyright (c) 2008, Alexander Neundorf, <neundorf@kde.org>
|
||||||
|
# Copyright (c) 2011, Michael Jansen, <kde@michael-jansen.biz>
|
||||||
|
#
|
||||||
|
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||||
|
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
|
||||||
|
# The default components were taken from a survey over other FindFFMPEG.cmake files
|
||||||
|
if (NOT FFmpeg_FIND_COMPONENTS)
|
||||||
|
set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#
|
||||||
|
### Macro: set_component_found
|
||||||
|
#
|
||||||
|
# Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present.
|
||||||
|
#
|
||||||
|
macro(set_component_found _component )
|
||||||
|
if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS)
|
||||||
|
message(DEBUG " - ${_component} found.")
|
||||||
|
set(${_component}_FOUND TRUE)
|
||||||
|
else ()
|
||||||
|
message(DEBUG " - ${_component} not found.")
|
||||||
|
endif ()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
#
|
||||||
|
### Macro: find_component
|
||||||
|
#
|
||||||
|
# Checks for the given component by invoking pkgconfig and then looking up the libraries and
|
||||||
|
# include directories.
|
||||||
|
#
|
||||||
|
macro(find_component _component _pkgconfig _library _header)
|
||||||
|
|
||||||
|
if (NOT WIN32)
|
||||||
|
# use pkg-config to get the directories and then use these values
|
||||||
|
# in the FIND_PATH() and FIND_LIBRARY() calls
|
||||||
|
find_package(PkgConfig)
|
||||||
|
if (PKG_CONFIG_FOUND)
|
||||||
|
pkg_check_modules(PC_${_component} ${_pkgconfig})
|
||||||
|
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}")
|
||||||
|
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}")
|
||||||
|
message(STATUS "${PC_${_component}_CFLAGS}")
|
||||||
|
endif ()
|
||||||
|
endif (NOT WIN32)
|
||||||
|
|
||||||
|
|
||||||
|
find_path(${_component}_INCLUDE_DIRS ${_header}
|
||||||
|
HINTS
|
||||||
|
${PC_${_component}_INCLUDEDIR}
|
||||||
|
${PC_${_component}_INCLUDE_DIRS}
|
||||||
|
PATH_SUFFIXES
|
||||||
|
ffmpeg
|
||||||
|
)
|
||||||
|
|
||||||
|
# CMake's default is to search first for shared libraries and then for static libraries.
|
||||||
|
# Todo later: add option to prefer static libs over dynamic:
|
||||||
|
find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a
|
||||||
|
HINTS
|
||||||
|
${PC_${_component}_LIBDIR}
|
||||||
|
${PC_${_component}_LIBRARY_DIRS}
|
||||||
|
)
|
||||||
|
|
||||||
|
set(${_component}_DEFINITIONS ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.")
|
||||||
|
set(${_component}_VERSION ${PC_${_component}_VERSION} CACHE STRING "The ${_component} version number.")
|
||||||
|
|
||||||
|
set_component_found(${_component})
|
||||||
|
|
||||||
|
mark_as_advanced(
|
||||||
|
${_component}_INCLUDE_DIRS
|
||||||
|
${_component}_LIBRARIES
|
||||||
|
${_component}_DEFINITIONS
|
||||||
|
${_component}_VERSION)
|
||||||
|
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
|
||||||
|
# Check for cached results. If there are skip the costly part.
|
||||||
|
if (NOT FFMPEG_LIBRARIES)
|
||||||
|
|
||||||
|
# Check for all possible component.
|
||||||
|
find_component(AVCODEC libavcodec avcodec libavcodec/avcodec.h)
|
||||||
|
find_component(AVFORMAT libavformat avformat libavformat/avformat.h)
|
||||||
|
find_component(AVDEVICE libavdevice avdevice libavdevice/avdevice.h)
|
||||||
|
#find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample
|
||||||
|
find_component(AVUTIL libavutil avutil libavutil/avutil.h)
|
||||||
|
find_component(AVFILTER libavfilter avfilter libavfilter/avfilter.h)
|
||||||
|
find_component(SWSCALE libswscale swscale libswscale/swscale.h)
|
||||||
|
find_component(POSTPROC libpostproc postproc libpostproc/postprocess.h)
|
||||||
|
find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h)
|
||||||
|
|
||||||
|
# Check if the required components were found and add their stuff to the FFMPEG_* vars.
|
||||||
|
foreach (_component ${FFmpeg_FIND_COMPONENTS})
|
||||||
|
if (${_component}_FOUND)
|
||||||
|
# message(STATUS "Required component ${_component} present.")
|
||||||
|
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} ${${_component}_LIBRARIES})
|
||||||
|
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS})
|
||||||
|
list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS})
|
||||||
|
else ()
|
||||||
|
# message(STATUS "Required component ${_component} missing.")
|
||||||
|
endif ()
|
||||||
|
endforeach ()
|
||||||
|
|
||||||
|
# Build the include path with duplicates removed.
|
||||||
|
if (FFMPEG_INCLUDE_DIRS)
|
||||||
|
list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
# cache the vars.
|
||||||
|
set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE)
|
||||||
|
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} CACHE STRING "The FFmpeg libraries." FORCE)
|
||||||
|
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} CACHE STRING "The FFmpeg cflags." FORCE)
|
||||||
|
|
||||||
|
mark_as_advanced(FFMPEG_INCLUDE_DIRS
|
||||||
|
FFMPEG_LIBRARIES
|
||||||
|
FFMPEG_DEFINITIONS)
|
||||||
|
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
# Now set the noncached _FOUND vars for the components.
|
||||||
|
# whisper.cpp does not need SWSCALE
|
||||||
|
foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS)
|
||||||
|
set_component_found(${_component})
|
||||||
|
endforeach ()
|
||||||
|
|
||||||
|
# Compile the list of required vars
|
||||||
|
set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS)
|
||||||
|
foreach (_component ${FFmpeg_FIND_COMPONENTS})
|
||||||
|
list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS)
|
||||||
|
endforeach ()
|
||||||
|
|
||||||
|
# Give a nice error message if some of the required vars are missing.
|
||||||
|
find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS})
|
||||||
|
|
@ -22,6 +22,10 @@ endif()
|
|||||||
|
|
||||||
set(TARGET common)
|
set(TARGET common)
|
||||||
|
|
||||||
|
if (WHISPER_FFMPEG)
|
||||||
|
set(COMMON_SOURCES_FFMPEG ffmpeg-transcode.cpp)
|
||||||
|
endif()
|
||||||
|
|
||||||
add_library(${TARGET} STATIC
|
add_library(${TARGET} STATIC
|
||||||
common.h
|
common.h
|
||||||
common.cpp
|
common.cpp
|
||||||
@ -29,6 +33,7 @@ add_library(${TARGET} STATIC
|
|||||||
common-ggml.cpp
|
common-ggml.cpp
|
||||||
grammar-parser.h
|
grammar-parser.h
|
||||||
grammar-parser.cpp
|
grammar-parser.cpp
|
||||||
|
${COMMON_SOURCES_FFMPEG}
|
||||||
)
|
)
|
||||||
|
|
||||||
include(DefaultTargetOptions)
|
include(DefaultTargetOptions)
|
||||||
|
@ -12,6 +12,7 @@ const whisperParamsMock = {
|
|||||||
model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
|
model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
|
||||||
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
|
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
|
||||||
use_gpu: true,
|
use_gpu: true,
|
||||||
|
flash_attn: false,
|
||||||
no_prints: true,
|
no_prints: true,
|
||||||
comma_in_time: false,
|
comma_in_time: false,
|
||||||
translate: true,
|
translate: true,
|
||||||
|
@ -39,6 +39,7 @@ struct whisper_params {
|
|||||||
bool no_timestamps = false;
|
bool no_timestamps = false;
|
||||||
bool no_prints = false;
|
bool no_prints = false;
|
||||||
bool use_gpu = true;
|
bool use_gpu = true;
|
||||||
|
bool flash_attn = false;
|
||||||
bool comma_in_time = true;
|
bool comma_in_time = true;
|
||||||
|
|
||||||
std::string language = "en";
|
std::string language = "en";
|
||||||
@ -146,6 +147,7 @@ int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
|||||||
|
|
||||||
struct whisper_context_params cparams = whisper_context_default_params();
|
struct whisper_context_params cparams = whisper_context_default_params();
|
||||||
cparams.use_gpu = params.use_gpu;
|
cparams.use_gpu = params.use_gpu;
|
||||||
|
cparams.flash_attn = params.flash_attn;
|
||||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||||
|
|
||||||
if (ctx == nullptr) {
|
if (ctx == nullptr) {
|
||||||
@ -326,6 +328,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
|||||||
std::string model = whisper_params.Get("model").As<Napi::String>();
|
std::string model = whisper_params.Get("model").As<Napi::String>();
|
||||||
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
|
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
|
||||||
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
|
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
|
||||||
|
bool flash_attn = whisper_params.Get("flash_attn").As<Napi::Boolean>();
|
||||||
bool no_prints = whisper_params.Get("no_prints").As<Napi::Boolean>();
|
bool no_prints = whisper_params.Get("no_prints").As<Napi::Boolean>();
|
||||||
bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>();
|
bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>();
|
||||||
int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
|
int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
|
||||||
@ -346,6 +349,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
|||||||
params.model = model;
|
params.model = model;
|
||||||
params.fname_inp.emplace_back(input);
|
params.fname_inp.emplace_back(input);
|
||||||
params.use_gpu = use_gpu;
|
params.use_gpu = use_gpu;
|
||||||
|
params.flash_attn = flash_attn;
|
||||||
params.no_prints = no_prints;
|
params.no_prints = no_prints;
|
||||||
params.no_timestamps = no_timestamps;
|
params.no_timestamps = no_timestamps;
|
||||||
params.audio_ctx = audio_ctx;
|
params.audio_ctx = audio_ctx;
|
||||||
|
@ -12,6 +12,7 @@ const whisperParams = {
|
|||||||
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
|
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
|
||||||
fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
|
fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
|
||||||
use_gpu: true,
|
use_gpu: true,
|
||||||
|
flash_attn: false,
|
||||||
no_prints: true,
|
no_prints: true,
|
||||||
comma_in_time: false,
|
comma_in_time: false,
|
||||||
translate: true,
|
translate: true,
|
||||||
|
@ -24,6 +24,11 @@
|
|||||||
#include <io.h>
|
#include <io.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef WHISPER_FFMPEG
|
||||||
|
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
|
||||||
|
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Function to check if the next argument exists
|
// Function to check if the next argument exists
|
||||||
std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
|
std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
|
||||||
if (i + 1 < argc && argv[i + 1][0] != '-') {
|
if (i + 1 < argc && argv[i + 1][0] != '-') {
|
||||||
@ -637,7 +642,7 @@ bool is_wav_buffer(const std::string buf) {
|
|||||||
|
|
||||||
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
|
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
|
||||||
drwav wav;
|
drwav wav;
|
||||||
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
|
||||||
|
|
||||||
if (fname == "-") {
|
if (fname == "-") {
|
||||||
{
|
{
|
||||||
@ -670,8 +675,19 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
|
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
|
||||||
|
#if defined(WHISPER_FFMPEG)
|
||||||
|
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
|
||||||
|
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
||||||
|
fprintf(stderr, "error: failed to read wav data as wav \n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#else
|
||||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
|
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
|
||||||
return false;
|
return false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wav.channels != 1 && wav.channels != 2) {
|
if (wav.channels != 1 && wav.channels != 2) {
|
||||||
|
350
examples/ffmpeg-transcode.cpp
Normal file
350
examples/ffmpeg-transcode.cpp
Normal file
@ -0,0 +1,350 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* transcode.c - convert audio file to WAVE
|
||||||
|
*
|
||||||
|
* Copyright (C) 2019 Andrew Clayton <andrew@digital-domain.net>
|
||||||
|
* Copyright (C) 2024 William Tambellini <william.tambellini@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Just for conveninent C++ API
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
// C
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#include <libavutil/opt.h>
|
||||||
|
#include <libavcodec/avcodec.h>
|
||||||
|
#include <libavformat/avformat.h>
|
||||||
|
#include <libswresample/swresample.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef uint64_t u64;
|
||||||
|
typedef int64_t s64;
|
||||||
|
typedef uint32_t u32;
|
||||||
|
typedef int32_t s32;
|
||||||
|
typedef uint16_t u16;
|
||||||
|
typedef int16_t s16;
|
||||||
|
typedef uint8_t u8;
|
||||||
|
typedef int8_t s8;
|
||||||
|
|
||||||
|
#define WAVE_SAMPLE_RATE 16000
|
||||||
|
#define AVIO_CTX_BUF_SZ 4096
|
||||||
|
|
||||||
|
static const char* ffmpegLog = getenv("FFMPEG_LOG");
|
||||||
|
// Todo: add __FILE__ __LINE__
|
||||||
|
#define LOG(...) \
|
||||||
|
do { if (ffmpegLog) fprintf(stderr, __VA_ARGS__); } while(0) // C99
|
||||||
|
|
||||||
|
/*
|
||||||
|
* WAVE file header based on definition from
|
||||||
|
* https://gist.github.com/Jon-Schneider/8b7c53d27a7a13346a643dac9c19d34f
|
||||||
|
*
|
||||||
|
* We must ensure this structure doesn't have any holes or
|
||||||
|
* padding so we can just map it straight to the WAVE data.
|
||||||
|
*/
|
||||||
|
struct wave_hdr {
|
||||||
|
/* RIFF Header: "RIFF" */
|
||||||
|
char riff_header[4];
|
||||||
|
/* size of audio data + sizeof(struct wave_hdr) - 8 */
|
||||||
|
int wav_size;
|
||||||
|
/* "WAVE" */
|
||||||
|
char wav_header[4];
|
||||||
|
|
||||||
|
/* Format Header */
|
||||||
|
/* "fmt " (includes trailing space) */
|
||||||
|
char fmt_header[4];
|
||||||
|
/* Should be 16 for PCM */
|
||||||
|
int fmt_chunk_size;
|
||||||
|
/* Should be 1 for PCM. 3 for IEEE Float */
|
||||||
|
s16 audio_format;
|
||||||
|
s16 num_channels;
|
||||||
|
int sample_rate;
|
||||||
|
/*
|
||||||
|
* Number of bytes per second
|
||||||
|
* sample_rate * num_channels * bit_depth/8
|
||||||
|
*/
|
||||||
|
int byte_rate;
|
||||||
|
/* num_channels * bytes per sample */
|
||||||
|
s16 sample_alignment;
|
||||||
|
/* bits per sample */
|
||||||
|
s16 bit_depth;
|
||||||
|
|
||||||
|
/* Data Header */
|
||||||
|
/* "data" */
|
||||||
|
char data_header[4];
|
||||||
|
/*
|
||||||
|
* size of audio
|
||||||
|
* number of samples * num_channels * bit_depth/8
|
||||||
|
*/
|
||||||
|
int data_bytes;
|
||||||
|
} __attribute__((__packed__));
|
||||||
|
|
||||||
|
struct audio_buffer {
|
||||||
|
u8 *ptr;
|
||||||
|
int size; /* size left in the buffer */
|
||||||
|
};
|
||||||
|
|
||||||
|
static void set_wave_hdr(wave_hdr& wh, size_t size) {
|
||||||
|
memcpy(&wh.riff_header, "RIFF", 4);
|
||||||
|
wh.wav_size = size + sizeof(struct wave_hdr) - 8;
|
||||||
|
memcpy(&wh.wav_header, "WAVE", 4);
|
||||||
|
memcpy(&wh.fmt_header, "fmt ", 4);
|
||||||
|
wh.fmt_chunk_size = 16;
|
||||||
|
wh.audio_format = 1;
|
||||||
|
wh.num_channels = 1;
|
||||||
|
wh.sample_rate = WAVE_SAMPLE_RATE;
|
||||||
|
wh.sample_alignment = 2;
|
||||||
|
wh.bit_depth = 16;
|
||||||
|
wh.byte_rate = wh.sample_rate * wh.sample_alignment;
|
||||||
|
memcpy(&wh.data_header, "data", 4);
|
||||||
|
wh.data_bytes = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_wave_hdr(int fd, size_t size) {
|
||||||
|
struct wave_hdr wh;
|
||||||
|
set_wave_hdr(wh, size);
|
||||||
|
write(fd, &wh, sizeof(struct wave_hdr));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int map_file(int fd, u8 **ptr, size_t *size)
|
||||||
|
{
|
||||||
|
struct stat sb;
|
||||||
|
|
||||||
|
fstat(fd, &sb);
|
||||||
|
*size = sb.st_size;
|
||||||
|
|
||||||
|
*ptr = (u8*)mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||||
|
if (*ptr == MAP_FAILED) {
|
||||||
|
perror("mmap");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int read_packet(void *opaque, u8 *buf, int buf_size)
|
||||||
|
{
|
||||||
|
struct audio_buffer *audio_buf = (audio_buffer*)opaque;
|
||||||
|
|
||||||
|
buf_size = FFMIN(buf_size, audio_buf->size);
|
||||||
|
|
||||||
|
/* copy internal buffer data to buf */
|
||||||
|
memcpy(buf, audio_buf->ptr, buf_size);
|
||||||
|
audio_buf->ptr += buf_size;
|
||||||
|
audio_buf->size -= buf_size;
|
||||||
|
|
||||||
|
return buf_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void convert_frame(struct SwrContext *swr, AVCodecContext *codec,
|
||||||
|
AVFrame *frame, s16 **data, int *size, bool flush)
|
||||||
|
{
|
||||||
|
int nr_samples;
|
||||||
|
s64 delay;
|
||||||
|
u8 *buffer;
|
||||||
|
|
||||||
|
delay = swr_get_delay(swr, codec->sample_rate);
|
||||||
|
nr_samples = av_rescale_rnd(delay + frame->nb_samples,
|
||||||
|
WAVE_SAMPLE_RATE, codec->sample_rate,
|
||||||
|
AV_ROUND_UP);
|
||||||
|
av_samples_alloc(&buffer, NULL, 1, nr_samples, AV_SAMPLE_FMT_S16, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* !flush is used to check if we are flushing any remaining
|
||||||
|
* conversion buffers...
|
||||||
|
*/
|
||||||
|
nr_samples = swr_convert(swr, &buffer, nr_samples,
|
||||||
|
!flush ? (const u8 **)frame->data : NULL,
|
||||||
|
!flush ? frame->nb_samples : 0);
|
||||||
|
|
||||||
|
*data = (s16*)realloc(*data, (*size + nr_samples) * sizeof(s16));
|
||||||
|
memcpy(*data + *size, buffer, nr_samples * sizeof(s16));
|
||||||
|
*size += nr_samples;
|
||||||
|
av_freep(&buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_audio_stream(const AVStream *stream)
|
||||||
|
{
|
||||||
|
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return non zero on error, 0 on success
|
||||||
|
// audio_buffer: input memory
|
||||||
|
// data: decoded output audio data (wav file)
|
||||||
|
// size: size of output data
|
||||||
|
static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
|
||||||
|
{
|
||||||
|
LOG("decode_audio: input size: %d\n", audio_buf->size);
|
||||||
|
AVFormatContext *fmt_ctx;
|
||||||
|
AVIOContext *avio_ctx;
|
||||||
|
AVStream *stream;
|
||||||
|
AVCodecContext *codec;
|
||||||
|
AVPacket packet;
|
||||||
|
AVFrame *frame;
|
||||||
|
struct SwrContext *swr;
|
||||||
|
u8 *avio_ctx_buffer;
|
||||||
|
unsigned int i;
|
||||||
|
int stream_index = -1;
|
||||||
|
int err;
|
||||||
|
const size_t errbuffsize = 1024;
|
||||||
|
char errbuff[errbuffsize];
|
||||||
|
|
||||||
|
av_register_all(); // from avformat. Still a must-have call for ffmpeg v3! (can be skipped for later versions)
|
||||||
|
|
||||||
|
fmt_ctx = avformat_alloc_context();
|
||||||
|
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
|
||||||
|
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);
|
||||||
|
avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, audio_buf, &read_packet, NULL, NULL);
|
||||||
|
fmt_ctx->pb = avio_ctx;
|
||||||
|
|
||||||
|
// open the input stream and read header
|
||||||
|
err = avformat_open_input(&fmt_ctx, NULL, NULL, NULL);
|
||||||
|
if (err) {
|
||||||
|
LOG("Could not read audio buffer: %d: %s\n", err, av_make_error_string(errbuff, errbuffsize, err));
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = avformat_find_stream_info(fmt_ctx, NULL);
|
||||||
|
if (err < 0) {
|
||||||
|
LOG("Could not retrieve stream info from audio buffer: %d\n", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < fmt_ctx->nb_streams; i++) {
|
||||||
|
if (is_audio_stream(fmt_ctx->streams[i])) {
|
||||||
|
stream_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stream_index == -1) {
|
||||||
|
LOG("Could not retrieve audio stream from buffer\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream = fmt_ctx->streams[stream_index];
|
||||||
|
codec = avcodec_alloc_context3(
|
||||||
|
avcodec_find_decoder(stream->codecpar->codec_id));
|
||||||
|
avcodec_parameters_to_context(codec, stream->codecpar);
|
||||||
|
err = avcodec_open2(codec, avcodec_find_decoder(codec->codec_id),
|
||||||
|
NULL);
|
||||||
|
if (err) {
|
||||||
|
LOG("Failed to open decoder for stream #%d in audio buffer\n", stream_index);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* prepare resampler */
|
||||||
|
swr = swr_alloc();
|
||||||
|
|
||||||
|
av_opt_set_int(swr, "in_channel_count", codec->channels, 0);
|
||||||
|
av_opt_set_int(swr, "out_channel_count", 1, 0);
|
||||||
|
av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0);
|
||||||
|
av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0);
|
||||||
|
av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0);
|
||||||
|
av_opt_set_int(swr, "out_sample_rate", WAVE_SAMPLE_RATE, 0);
|
||||||
|
av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0);
|
||||||
|
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
|
||||||
|
|
||||||
|
swr_init(swr);
|
||||||
|
if (!swr_is_initialized(swr)) {
|
||||||
|
LOG("Resampler has not been properly initialized\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
av_init_packet(&packet);
|
||||||
|
frame = av_frame_alloc();
|
||||||
|
if (!frame) {
|
||||||
|
LOG("Error allocating the frame\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* iterate through frames */
|
||||||
|
*data = NULL;
|
||||||
|
*size = 0;
|
||||||
|
while (av_read_frame(fmt_ctx, &packet) >= 0) {
|
||||||
|
avcodec_send_packet(codec, &packet);
|
||||||
|
|
||||||
|
err = avcodec_receive_frame(codec, frame);
|
||||||
|
if (err == AVERROR(EAGAIN))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
convert_frame(swr, codec, frame, data, size, false);
|
||||||
|
}
|
||||||
|
/* Flush any remaining conversion buffers... */
|
||||||
|
convert_frame(swr, codec, frame, data, size, true);
|
||||||
|
|
||||||
|
av_frame_free(&frame);
|
||||||
|
swr_free(&swr);
|
||||||
|
//avio_context_free(); // todo?
|
||||||
|
avcodec_close(codec);
|
||||||
|
avformat_close_input(&fmt_ctx);
|
||||||
|
avformat_free_context(fmt_ctx);
|
||||||
|
|
||||||
|
if (avio_ctx) {
|
||||||
|
av_freep(&avio_ctx->buffer);
|
||||||
|
av_freep(&avio_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// in mem decoding/conversion/resampling:
|
||||||
|
// ifname: input file path
|
||||||
|
// owav_data: in mem wav file. Can be forwarded as it to whisper/drwav
|
||||||
|
// return 0 on success
|
||||||
|
int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_data) {
|
||||||
|
LOG("ffmpeg_decode_audio: %s\n", ifname.c_str());
|
||||||
|
int ifd = open(ifname.c_str(), O_RDONLY);
|
||||||
|
if (ifd == -1) {
|
||||||
|
fprintf(stderr, "Couldn't open input file %s\n", ifname.c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
u8 *ibuf = NULL;
|
||||||
|
size_t ibuf_size;
|
||||||
|
int err = map_file(ifd, &ibuf, &ibuf_size);
|
||||||
|
if (err) {
|
||||||
|
LOG("Couldn't map input file %s\n", ifname.c_str());
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
LOG("Mapped input file: %x size: %d\n", ibuf, ibuf_size);
|
||||||
|
struct audio_buffer inaudio_buf;
|
||||||
|
inaudio_buf.ptr = ibuf;
|
||||||
|
inaudio_buf.size = ibuf_size;
|
||||||
|
|
||||||
|
s16 *odata=NULL;
|
||||||
|
int osize=0;
|
||||||
|
|
||||||
|
err = decode_audio(&inaudio_buf, &odata, &osize);
|
||||||
|
LOG("decode_audio returned %d \n", err);
|
||||||
|
if (err != 0) {
|
||||||
|
LOG("decode_audio failed\n");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
LOG("decode_audio output size: %d\n", osize);
|
||||||
|
|
||||||
|
wave_hdr wh;
|
||||||
|
const size_t outdatasize = osize * sizeof(s16);
|
||||||
|
set_wave_hdr(wh, outdatasize);
|
||||||
|
owav_data.resize(sizeof(wave_hdr) + outdatasize);
|
||||||
|
// header:
|
||||||
|
memcpy(owav_data.data(), &wh, sizeof(wave_hdr));
|
||||||
|
// the data:
|
||||||
|
memcpy(owav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -3,4 +3,4 @@ add_executable(${TARGET} main.cpp)
|
|||||||
|
|
||||||
include(DefaultTargetOptions)
|
include(DefaultTargetOptions)
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common whisper ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
@ -947,7 +947,7 @@ int main(int argc, char ** argv) {
|
|||||||
"application/json");
|
"application/json");
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset params to thier defaults
|
// reset params to their defaults
|
||||||
params = default_params;
|
params = default_params;
|
||||||
});
|
});
|
||||||
svr.Post(sparams.request_path + "/load", [&](const Request &req, Response &res){
|
svr.Post(sparams.request_path + "/load", [&](const Request &req, Response &res){
|
||||||
|
3
samples/.gitignore
vendored
3
samples/.gitignore
vendored
@ -1 +1,4 @@
|
|||||||
*
|
*
|
||||||
|
!jfk.wave
|
||||||
|
!jfk.mp3
|
||||||
|
|
||||||
|
BIN
samples/jfk.mp3
Normal file
BIN
samples/jfk.mp3
Normal file
Binary file not shown.
@ -74,3 +74,14 @@ add_test(NAME ${TEST_TARGET}
|
|||||||
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
|
||||||
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
|
||||||
|
|
||||||
|
if (WHISPER_FFMPEG)
|
||||||
|
set(TEST_TARGET test-main-tiny-mp3)
|
||||||
|
# Check with reviewers: any way to check the output transcription via ctest (diff, ...)?
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.mp3)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;mp3")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
19
whisper.cpp
19
whisper.cpp
@ -818,6 +818,8 @@ struct whisper_state {
|
|||||||
|
|
||||||
whisper_decoder decoders[WHISPER_MAX_DECODERS];
|
whisper_decoder decoders[WHISPER_MAX_DECODERS];
|
||||||
|
|
||||||
|
ggml_backend_t backend = nullptr;
|
||||||
|
|
||||||
// ggml-alloc:
|
// ggml-alloc:
|
||||||
// - stores meta info about the intermediate tensors into the `meta` buffers
|
// - stores meta info about the intermediate tensors into the `meta` buffers
|
||||||
// - stores the actual tensor data into the `data` buffers
|
// - stores the actual tensor data into the `data` buffers
|
||||||
@ -2261,7 +2263,7 @@ static bool whisper_encode_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!whisper_encode_external(wstate)) {
|
if (!whisper_encode_external(wstate)) {
|
||||||
if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
|
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -2284,7 +2286,7 @@ static bool whisper_encode_internal(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
|
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2300,7 +2302,7 @@ static bool whisper_encode_internal(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
|
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2801,7 +2803,7 @@ static bool whisper_decode_internal(
|
|||||||
|
|
||||||
logits = gf->nodes[gf->n_nodes - 1];
|
logits = gf->nodes[gf->n_nodes - 1];
|
||||||
|
|
||||||
if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
|
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3248,6 +3250,13 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|||||||
|
|
||||||
whisper_state * state = new whisper_state;
|
whisper_state * state = new whisper_state;
|
||||||
|
|
||||||
|
state->backend = whisper_backend_init(ctx->params);
|
||||||
|
if (!state->backend) {
|
||||||
|
WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);
|
||||||
|
whisper_free_state(state);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
|
// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
|
||||||
// in theory, there can be a case where this is not enough, but in practice it should always be enough
|
// in theory, there can be a case where this is not enough, but in practice it should always be enough
|
||||||
const int factor = 3;
|
const int factor = 3;
|
||||||
@ -3684,6 +3693,8 @@ void whisper_free_state(struct whisper_state * state) {
|
|||||||
ggml_gallocr_free(state->alloc_cross.alloc);
|
ggml_gallocr_free(state->alloc_cross.alloc);
|
||||||
ggml_gallocr_free(state->alloc_decode.alloc);
|
ggml_gallocr_free(state->alloc_decode.alloc);
|
||||||
|
|
||||||
|
ggml_backend_free(state->backend);
|
||||||
|
|
||||||
// [EXPERIMENTAL] Token-level timestamps with DTW
|
// [EXPERIMENTAL] Token-level timestamps with DTW
|
||||||
aheads_masks_free(state->aheads_masks);
|
aheads_masks_free(state->aheads_masks);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user