Compare commits

..

539 Commits

Author SHA1 Message Date
05ce7476ae ggml-ci: update input env variables to GG_BUILD_ 2025-03-14 03:14:44 -05:00
f11de0e73c ggml-ci: add run.sh (#2877) 2025-03-14 09:29:55 +02:00
d5cc27ee4d examples : add dl to the list of libraries linked (#2875)
* examples : add dl to the list of libraries linked

This commit adds the dynamic linker library to the list of libraries
linked by the examples.

The motivation for this change is that when building the examples on
ubuntu 20.04, which uses GCC 9.4.0, the dynamic linker requires
explicit linking or the following error is generated:
```console
[ 64%] Linking CXX executable ../../bin/whisper-cli
cd /app/whisper.cpp/build/examples/cli && /usr/bin/cmake -E cmake_link_script CMakeFiles/whisper-cli.dir/link.txt --verbose=1
/usr/bin/c++  -O3 -DNDEBUG   CMakeFiles/whisper-cli.dir/cli.cpp.o  -o ../../bin/whisper-cli  -Wl,-rpath,/app/whisper.cpp/build/src:/app/whisper.cpp/build/ggml/src: ../libcommon.a ../../src/libwhisper.so.1.7.4 -pthread ../../ggml/src/libggml.so ../../ggml/src/libggml-cpu.so ../../ggml/src/libggml-base.so
/usr/bin/ld: ../libcommon.a(common-whisper.cpp.o): undefined reference to symbol 'dlclose@@GLIBC_2.2.5'
/usr/bin/ld: /lib/x86_64-linux-gnu/libdl.so.2: error adding symbols: DSO missing from command line
collect2: error: ld returned 1 exit status
make[2]: *** [examples/cli/CMakeFiles/whisper-cli.dir/build.make:89: bin/whisper-cli] Error 1
make[2]: Leaving directory '/app/whisper.cpp/build'
make[1]: *** [CMakeFiles/Makefile2:433: examples/cli/CMakeFiles/whisper-cli.dir/all] Error 2
make[1]: Leaving directory '/app/whisper.cpp/build'
make: *** [Makefile:130: all] Error 2
```

Resolves: https://github.com/ggerganov/whisper.cpp/issues/2854
2025-03-14 04:42:20 +01:00
5bb1d58c6a whisper: add xcframework build script (#2873)
* whisper: add xcframework build script

* added apple validation scripts

* fixed Readme

* validation script fix
2025-03-13 13:56:39 +01:00
7d14005717 objc : fix build, tmp remove GPU support, use C++17 2025-03-08 15:13:01 +02:00
4ffb8e3e4d cmake : fix ggml-config (ggml/0) 2025-03-08 15:13:01 +02:00
1d8d8ae55e sync : ggml 2025-03-08 15:13:01 +02:00
eebf6bc0bd ggml-cpu: faster AVX2 variant for IQ1_M (llama/12216) 2025-03-08 15:13:01 +02:00
dc8f423b40 metal : simplify kernel arguments using a struct (ggml/3229) (llama/12194)
* metal : refactor im2col parameters into a struct

* metal: Change im2col offset types from int32_t to uint64_t to support larger memory offsets

* metal : refactor sum_rows parameters into a struct

* metal : refactor soft_max parameters into a struct

* metal : refactor diag_mask_inf parameters into a struct

* metal : refactor ssm_conv parameters into a struct

* metal : refactor ssm_scan parameters into a struct

* metal : refactor get_rows parameters into a struct

* metal : refactor group_norm parameters into a struct

* metal : refactor conv_transpose_1d parameters into a struct

* metal : refactor upscale parameters into a struct

* metal : refactor pad parameters into a struct

* metal : refactor pad_reflect_1d parameters into a struct

* metal : refactor arange parameters into a struct

* metal : refactor timestep_embedding parameters into a struct

* metal : refactor argsort parameters into a struct

* metal : refactor leaky_relu parameters into a struct

* metal : refactor pool_2d parameters into a struct

* metal : fix trailing whitespace

---------

Co-authored-by: alexju <alexju@tencent.com>
2025-03-08 15:13:01 +02:00
548e7052f1 metal : fix default.metallib build (llama/12224)
This commit updates the custom command to build the default.metallib
file to use the correct path to ../ggml-common.h by using the variable
METALLIB_COMMON.

The motivation for this change is that currently when building and
specifying GGML_METAL_EMBED_LIBRARY=OFF the following error is
generated:
```console
[ 11%] Linking CXX shared library ../../bin/libggml.dylib
[ 11%] Built target ggml
make[2]: *** No rule to make target `ggml/src/ggml-metal/ggml-common.h', needed by `bin/default.metallib'.  Stop.
make[1]: *** [ggml/src/ggml-metal/CMakeFiles/ggml-metal-lib.dir/all] Error 2
```

With the above change the build could progress but there was a follow
on error about not being able to find the ggml-common.h file in
ggml-metal.metal where is was included as a relative path:
```console
[ 11%] Compiling Metal kernels
/Users/danbev/work/llama.cpp/build/bin/ggml-metal.metal:6:10: error: '../ggml-common.h' file not found, did you mean 'ggml-common.h'?
         ^~~~~~~~~~~~~~~~~~
         "ggml-common.h"
1 error generated.
```
Removing the relative path then allowed the build to complete
successfully.
2025-03-08 15:13:01 +02:00
a34cb73dc2 opencl: Noncontiguous norm, rms_norm, disable fp16 for some ops (llama/12217)
* opencl: support noncontiguous `norm`

* opencl: support noncontiguous `rms_norm`

* opencl: disable fp16 for `ADD`, `MUL`, `SCALE`, `RELU`, `GELU`, `SILU`, `CLAMP`
2025-03-08 15:13:01 +02:00
82f9496657 cmake : fix undefined reference errors for std::filesystem in ggml (#12092) (llama/12094)
Signed-off-by: Ray Lee <hburaylee@gmail.com>
Co-authored-by: Ray Lee <hburaylee@gmail.com>
2025-03-08 15:13:01 +02:00
e3c85e75bd CUDA: fix FA logic for PTX 7.0 and CC >= 7.5 (llama/12222) 2025-03-08 15:13:01 +02:00
b9eab73fa2 HIP/CUDA: set the paramerter value in maintain_cuda_graph instead of replaceing it. (llama/12209)
This avoids conflict with internal cuda/hip runtimes memory managment behavior.
2025-03-08 15:13:01 +02:00
76385c8311 opencl : fix buffer alignment (llama/12197)
Fix the following error:

```
ggml-alloc.c:99: not enough space in the buffer
ggml_tallocr_alloc: not enough space in the buffer to allocate blk.17.ffn_down.weight (needed 27525120, available 27521024)
```

which occurs when `ggml_backend_opencl_context::alignment` is larger
than `cl_ptr_base` (hard-coded to `0x1000`).

Also, fix `ggml_backend_opencl_context::alignment` was set to
`CL_DEVICE_MEM_BASE_ADDR_ALIGN` which was treated as bytes but the
value is reported in bits.
2025-03-08 15:13:01 +02:00
442cd1d2e7 opencl : fix ulong kernel args were set from int variables (llama/12174)
... which left garbage bits in the upper half of the kernel args. This
caused segmentation faults when running PoCL.
2025-03-08 15:13:01 +02:00
bc8cb97e02 opencl : fix profile-related errors (llama/12095)
Co-authored-by: ubuntu <ubuntu@localhost.localdomain>
2025-03-08 15:13:01 +02:00
8dcadf736b ggml-cpu: Faster IQ1 mul_mat_vec on AVX2 using BMI2 instructions (llama/12154)
* ggml-cpu: Faster IQ1 mul_mat_vec on AVX2 using BMI2 instructions

* cmake: Add GGML_BMI2 build option

* ggml: enable BMI2 on relevant CPU variants

* ggml-cpu: include BMI2 in backend score

* ggml-cpu: register BMI2 in ggml_backend_cpu_get_features

* ggml-cpu: add __BMI2__ define when using MSVC
2025-03-08 15:13:01 +02:00
93986b61e0 SYCL: Disable f16 Unary OPs as not supported by the kernels (llama/12201) 2025-03-08 15:13:01 +02:00
bd1a9e34c9 ggml : fix GGMLMetalClass ODR (llama/12200)
-- it might happen if ggml is loaded from 2 separate libraries since each one of them will expose the class. This is more of a guard since we want to use only Metal as embedded library and don't care about the other case.
2025-03-08 15:13:01 +02:00
cc03608e78 ggml : ggml_compute_forward_concat() for arbitrary tensor type (ggml/1118)
* ggml_compute_forward_concat() for arbitrary tensor type

* Check that tensors' type match

* ggml-cpu.c: check type of source tensors

* ggml-cpu.c: move tensor type check to ggml_compute_forward_concat()

* ggml.c: check concatenated tensor type

* Remove tensor type check from ggml_compute_forward_concat() in ggml-cpu.c

..., as it was moved to ggml.c.
2025-03-08 15:13:01 +02:00
54a54faee4 vulkan : sync (llama/0)
ggml-ci
2025-03-08 15:13:01 +02:00
96a92ecc4c ggml : portability fixes for VS 2017 (llama/12150)
* Add include files for std::min/max and std::toupper/tolower

* win32: move _USE_MATH_DEFINES before includes to ensure M_PI is defined

* Use GGML_RESTRICT instead of "restrict" keyword everywhere, and use "__restrict" in MSVC plain C mode

* win32: only use __restrict in MSVC if C11/C17 support is not enabled

---------

Co-authored-by: Marcus Groeber <Marcus.Groeber@cerence.com>
2025-03-08 15:13:01 +02:00
edd1d8686a HIP: implement FlashAttention via rocWMMA for CDNA and RDNA3+ (llama/12032)
Adds GGML_HIP_ROCWMMA_FATTN and rocwmma header check
Adds rocWMMA support to fattn-wmma-f16
2025-03-08 15:13:01 +02:00
dc6f4e7c05 ggml : fix kleidiai build (llama/12159)
The libggml API has changed, but this has not been updated.
2025-03-08 15:13:01 +02:00
74c85d154e SYCL: Move CPY kernels to a separate file and add few missing kernels (llama/12133)
* SYCL: refactor and move cpy kernels to a separate file

* Add few missing cpy kernels

* refactor and add debug logs
2025-03-08 15:13:01 +02:00
eb2d8b6ffd ggml-backend : keep paths in native string type when possible (llama/12144) 2025-03-08 15:13:01 +02:00
b442dcd598 CUDA: compress mode option and default to size (llama/12029)
cuda 12.8 added the option to specify stronger compression for binaries, so we now default to "size".
2025-03-08 15:13:01 +02:00
c98681e6d5 ggml : upgrade init_tensor API to return a ggml_status (llama/11854)
* Upgrade init_tensor API to return a ggml_status

To prepare for an 'abort-free' ggml
(ggml not to abort on OOMs but return a OOM status),
as agreeed with Diego in the ggml repo,
upgrade the init_tensor() and view_init() APIs
to return a ggml_status.

* misc fixes

---------

Co-authored-by: slaren <slarengh@gmail.com>
2025-03-08 15:13:01 +02:00
3bab804981 vulkan: add specific MMV kernels for IQ2 and IQ3 quants + optimizations (llama/11595)
* vulkan: implement specialized MMV kernels for IQ2 quantizations

* vulkan: add MMV kernels for IQ3 quants

* vulkan: Increase MMV batch size and unroll IQ LUT setup

* vulkan: fix init_iq_shmem for WG sizes larger than tables

* vulkan: common batch size for all I-quants
2025-03-08 15:13:01 +02:00
c927830a70 CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098) 2025-03-08 15:13:01 +02:00
992b51b3d5 ggml: aarch64: implement SVE kernels for q2_k_q8_k vector dot (llama/12064)
* Added SVE Support for Q2_K Quantized Models

* Use 4-space indentation in the switch cases

* removed comments lines

* Remove the loop Retain the curly bracess for better understanding of code

* Remove the comment like added for q3_k_q8_k kernel

---------

Co-authored-by: vithulep <p.m.vithule1517@gmail.com>
2025-03-08 15:13:01 +02:00
2c882cbe4c CANN: Fix build error with GCC 13 (llama/11990)
Remove unused header file that causes compilation failure on ARM
platform with GCC 13.
2025-03-08 15:13:01 +02:00
Eve
1fbb119b1e vulkan: matmul dequantization improvements (llama/12015)
* faster dequant for old quants

* dont use unpack for iq4_nl

* vec2 unpack for q8
2025-03-08 15:13:01 +02:00
40dea850fd vulkan: improve im2col (llama/11826)
* vulkan: improve im2col performance
2025-03-08 15:13:01 +02:00
8255a830a8 cmake: Fix ggml backend dependencies and installation (llama/11818)
* Fix dependencies between ggml and backends

ggml backends link only to ggml-base and ggml links to all backends.

* Fix installation of ggml backends

Set up GNUInstallDirs before setting the installation directory of ggml backends
2025-03-08 15:13:01 +02:00
a0f76b2da7 vulkan: fix assertion when qy_needs_dequant (llama/12068)
Looks like a copy/paste bug from qx_needs_dequant.
2025-03-08 15:13:01 +02:00
394768c48b ggml-cpu: Fix build with sve (llama/12059)
* ggml-cpu: Fix build with sve

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* ggml-cpu: Remove unused variable in sve q3_k vec dot

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

---------

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
2025-03-08 15:13:01 +02:00
846e01b2c0 cuda: unary ops as float + de-duplicate (ggml/1130) 2025-03-08 15:13:01 +02:00
6ac8e6b2ce cuda/vulkan: specify fp32-only support for some operations in supports_op (ggml/1129)
* cuda: restrict SILU_BACK to fp32, since fp16 exceeds the desired test threshold

* vulkan: specify fp32-only support for certain ops (that are now tested for fp16 as well)

* f32 sigmoid in vulkan supports op

* Revert "f32 sigmoid in vulkan supports op"

This reverts commit c6f04b3c19bf4504c2776149c6d8cd84e0b48acb.
2025-03-08 15:13:01 +02:00
60d2ddebdf cuda/cpu: Increase support for fp16 unary operations (ggml/1125)
* Support fp16 unary operations in the CUDA backend

* cpu: increase fp16 support for unary operators in the CPU backend

* cuda: increase fp16 support for unary operators in the CUDA backend

* Add test cases for fp16 unary operators

* metal: update supports_op for unary operators that don't support fp16, to prevent test-backend-ops from failing

* metal: fix PR comments for unary op support after fp16 unary tests
2025-03-08 15:13:01 +02:00
2e180184a8 Told cmake to install ggml-cpp.h as a public header file. (ggml/1126)
It is used by Whisper talk-llama example.

Co-authored-by: Petter Reinholdtsen <pere@debian.org>
2025-03-08 15:13:01 +02:00
ef40950c4a common : more general m_audio_len update logic (#2855)
Co-authored-by: Ivy233 <wangjinrun@uniontech.com>
2025-03-07 10:10:03 +02:00
c774eec709 go : improve model download (#2756)
* Updated models download URL

* Updated list of models available

All of the high efficiency quantized models are rejected when trying to download. They exist on the server. Let's allow them.

* added path prefix for whisper-cli in message to user. The message is misleading if this script is called from another script in a different folder. So the message has to be fixed.

* undid download URL change I made earlier. Fixed filepath.Join(urlPath, model) bug.

* Undid download URL change I made earlier.

Seems that the old URL works but only when provided a model to download. Still doesn't explain why there's a different download URL that also works. Please elucidate in docs.

* Fixed URLForModel Function's bug

filepath.Join is designed for filesystem paths, and it uses backslashes (\) on Windows. URLs, however, require forward slashes (/), so the use of filepath.Join is inappropriate for constructing URLs.

The fmt.Sprintf function ensures that forward slashes are used.

* Fixed URL trailing / double slash bug

Ensure no double slash by trimming trailing '/' from srcUrl if present

* Fixed bad download URL, missing ggml prefix

Not sure if that was a bug I introduced but it was trying to download without the prefix.

* Added question before downloading all models. Added download size estimate

HEAD Requests:
Efficiently fetches file sizes without downloading the content.
Interactive Workflow:
Allows the user to make informed decisions about downloading all models.
Safe Defaults:
Aborts if the user does not explicitly confirm.

* Fixed Unbuffered channel warning.

warning in context.go : misuse of unbuffered os.Signal channel as argument to signal.

The warning indicates that the unbuffered channel used in signal.Notify in context.go may be misused. In Go, unbuffered channels can cause potential deadlocks if signals are sent faster than they are received.

* Fixed download size calculation, download URL prefix bug, added link to models URL for user.

The URL formatter was prepending the model name to the formatted model name in the URL

* Added logs and exes to gitignore

* Delete bindings/go/examples/go-model-download/go-model-download.exe

* Delete whisper_build.log
2025-03-07 10:03:51 +02:00
5b481a27a6 common : fix audio loading by miniaudio (#2862) 2025-03-04 19:05:21 +02:00
fc7b1ee521 fix: missing include common-whisper (#2858) 2025-03-02 20:55:11 +02:00
c42f67e2d2 ruby : follow audio library change (#2851)
* Enable CPU

* Follow audio lib change
2025-02-28 08:09:02 +02:00
339a1cba5d whisper : support GGML_BACKEND_DL (#2843)
* whisper : support GGML_BACKEND_DL

* fix DTW crash

* whisper.objc : fix build - add ggml-cpp.h

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-27 13:35:07 +01:00
c64f3e8ada common : separate whisper sources (#2846)
* common : separate whisper sources

* examples : add chrono

* examples : add more headers
2025-02-27 12:50:32 +02:00
9f83f67221 common : fix build min/max (#2845)
* common : try to fix build

* cont : try another fix
2025-02-27 10:39:13 +02:00
7d3da68f79 examples : use miniaudio for direct decoding flac, mp3, ogg and wav (#2759) 2025-02-27 09:06:54 +02:00
b5d21359c1 stream : stop on ^C when no audio is received (#2822)
Add check for ctrl-c in potentially endless loop while calling audio.get()
to receive sound.

Co-authored-by: Petter Reinholdtsen <pere@debian.org>
2025-02-27 08:59:51 +02:00
17addf7104 sync : ggml 2025-02-27 08:55:36 +02:00
cdaee8b4bd Support pure float16 add/sub/mul/div operations in the CUDA (and CPU) backend (ggml/1121)
* Support float16-to-float16 add/sub/mul/div operations in the CUDA backend

* Add fp16 support for add/sub/mul/div on the CPU backend

* Add test cases for fp16 add/sub/mul/div
2025-02-27 08:55:36 +02:00
4b60ff4f92 metal : copy kernels for quant to F32/F16 conversions (llama/12017)
metal: use dequantize_q templates

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-27 08:55:36 +02:00
b43b9d928c opencl: fix for small models (llama/11950)
* opencl: fix small shape gemv, remove unused extensions

* opencl: fix `transpose_16`, `dump_tensor`, enforce subgroup size

* opencl: fix for token length < 4

* opencl: use wave size of 64 for all Adreno GPUs

---------

Co-authored-by: Shawn Gu <quic_shawngu@quicinc.com>
Co-authored-by: Skyler Szot <quic_sszot@quicinc.com>
2025-02-27 08:55:36 +02:00
e3cb412a59 Optimize mul_mat for Q4_0 on Intel GPU (llama/12035)
* opt performance by reorder for Intel GPU

* detect hw type and save opt feature, and print opt feature

* correct name

* support optimize graph once when compute graph, record the opt status in tensor->extra, make CI passed

* add env variable GGML_SYCL_DISABLE_OPT for debug

* use syclex::architecture replace the custom hw define, update the guide for GGML_SYCL_DISABLE_OPT

* add performance data

* mv getrows functions to separeted files

* fix global variables

---------

Co-authored-by: arthw <14088817+arthw@users.noreply.github.com>
2025-02-27 08:55:36 +02:00
ac301a7d9b SYCL: Fix GGML_SYCL_DEBUG macro (llama/11995) 2025-02-27 08:55:36 +02:00
82e04e7670 ggml-cpu: Support s390x SIMD Instruction Set (llama/12019)
* ggml: add s390x ARCH_FLAGS for compilation

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add SIMD for s390x using vector intrinsics

SIMD is activated for:
* ggml_vec_dot_f32
* ggml_vec_dot_f16
* ggml_vec_mad_f32
* ggml_vec_mad_f16
* ggml_vec_mad_f32_unroll
* ggml_vec_scale_f32
* ggml_vec_scale_f16

SIMD is NOT activated for:
* ggml_vec_dot_f16_unroll (pending bugfix)

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix missing escape character in GGML_F32x4_REDUCE

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add temporary patch for GGML_F32_ARR and GGML_F16_ARR

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix s390x GGML_F32x4_REDUCE

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: full SIMD activation for F32,F16 s390x

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add option to disable s390x VXE/VXE2

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: change vecintrin.h include to ggml-cpu-impl

* add __VXE__ and __VXE2__ macros

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* cmake: add s390x target detection for VX/VXE/VXE2

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: move s390x vector intrinsics to ggml-cpu-impl.h

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x Q8_0 SIMD

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: correct documentation for Q8_0

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x reduce code complexity Q8_0

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x bugfix typo Q8_0

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activated for Q4_1

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x inline vec_reve

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for Q4_0

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add VXE backend feature

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: remove test.py

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for quantize_row_q8_0

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for quantize_row_q8_1

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for iq4_xs

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: bugfix iq4_xs

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for iq4_nl

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add float, double, and long vector data type

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: clean up iq4_xs SIMD

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix improper use of restrict keyword

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: update warning message for ggml_vec_tbl

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: untested implementation of ggml_vec_dot_iq2_xxs_q8_K

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: update ggml_vec_dot_q4_1_q8_1 to use typedefs

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: switch to restrict for iq4_nl

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: slight dot product speed improvement for q4_1_q8_1

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for q6_K

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add missing `_t` to ggml_int8x16x4_t

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix missing `_t` for ggml_vec_xl_s8x4

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix more missing `_t`

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add unroll and prefetch to Q8_0

increase of 3.86% for prompt processing and 32.22% for token generation

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: patch Q8_0 to use proper vector sizes

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: optimise Q8_0 dot prod compute kernel further

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: add unroll and prefetch to Q4_1

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: refactor Q6_K variable naming for readability

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix Q6_K typos

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for Q5_K

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix wrong char*x16_t naming

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: Q5_K y0 wrong signness

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix Q5_K invalid uchar type

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix Q5_K invalid uchar type

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: s390x SIMD activation for Q4_K

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: fix Q4_K invalid vector intrinsics

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: simplify ggml_padd_s16 compute kernel

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: correct ggml-cpu vxe wording

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: change ggml_aligned_malloc alignment to 256

256 is the cache line size for s390x platforms

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: resolve pr merge via cherry-pick 225bbbf

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml : fix LoongArch compile error with 128-bit SIMD (llama/11701)

* ggml: resolve pr merge via cherry-pick 4571953

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

* ggml: cmake remove fork when determining s390x machine type

thank you @ericcurtin

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

---------

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
Co-authored-by: Jinyang He <hejinyang@loongson.cn>
Co-authored-by: junchao-zhao <68935141+junchao-loongson@users.noreply.github.com>
2025-02-27 08:55:36 +02:00
38ac47cd4d CUDA: app option to compile without FlashAttention (llama/12025) 2025-02-27 08:55:36 +02:00
2d70cd36d7 CUDA: optimize FA for GQA + large batches (llama/12014) 2025-02-27 08:55:36 +02:00
98dab49b9a cuda: Add Q5_1, Q5_0, Q4_1 and Q4_0 to F32 conversion support. (llama/12000) 2025-02-27 08:55:36 +02:00
b1385e9aa9 CUDA: correct the lowest Maxwell supported by CUDA 12 (llama/11984)
* CUDA: correct the lowest Maxwell supported by CUDA 12

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2025-02-27 08:55:36 +02:00
48f5e893f5 MUSA: support ARM64 and enable dp4a .etc (llama/11843)
* MUSA:  support ARM64 and enable __dp4a .etc

* fix cross entropy loss op for musa

* update

* add cc info log for musa

* add comment for the MUSA .cc calculation block

---------

Co-authored-by: Bodhi Hu <huaishun.hu@mthreads.com>
2025-02-27 08:55:36 +02:00
dc21871fcb ggml-cpu: Add CPU backend support for KleidiAI library (llama/11390)
* ggml-cpu: Add CPU backend support for KleidiAI library

* Add environmental variable GGML_KLEIDIAI_SME

* Add support for multithread LHS conversion

* Switch kernel selection order to dotprod and i8mm

* updates for review comments

* More updates for review comments

* Reorganize and rename KleidiAI files

* Move ggml-cpu-traits.h to source file

* Update cmake for SME build and add alignment for SME

* Remove append GGML_USE_CPU_KLEIDIAI to the GGML_CDEF_PUBLIC list
2025-02-27 08:55:36 +02:00
64a430bc81 ggml: aarch64: implement SVE kernels for q3_K_q8_K vector dot (llama/11917)
* Added SVE Implementation for Q3_K Kernel in ggml-cpu-quants.c file

* Improved Formating of code in  ggml-cpu-quants.c file

* style : minor fixes

* style : less whitespaces

* style : ptr spaceing

---------

Co-authored-by: vithulep <p.m.vithule1517@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-27 08:55:36 +02:00
51a3580c79 CUDA: use async data loading for FlashAttention (llama/11894)
* CUDA: use async data loading for FlashAttention

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-02-27 08:55:36 +02:00
37a21dd43d vulkan: implement several ops relevant for ggml_opt (llama/11769)
* vulkan: support memset_tensor

* vulkan: support GGML_OP_SUM

* vulkan: implement GGML_OP_ARGMAX

* vulkan: implement GGML_OP_SUB

* vulkan: implement GGML_OP_COUNT_EQUAL

* vulkan: implement GGML_OP_OPT_STEP_ADAMW

* vulkan: fix check_results RWKV_WKV6 crash and memory leaks

* vulkan: implement GGML_OP_REPEAT_BACK

* tests: remove invalid test-backend-ops REPEAT_BACK tests

* vulkan: fix COUNT_EQUAL memset using a fillBuffer command
2025-02-27 08:55:36 +02:00
8a22a8b17f vulkan: support multi/vision rope, and noncontiguous rope (llama/11902) 2025-02-27 08:55:36 +02:00
fcbcad0c90 metal : fix the crash caused by the lack of residency set support on Intel Macs. (llama/11904) 2025-02-27 08:55:36 +02:00
4444db7360 metal : optimize dequant q6_K kernel (llama/11892) 2025-02-27 08:55:36 +02:00
a7fc1038ca repo : update links to new url (llama/11886)
* repo : update links to new url

ggml-ci

* cont : more urls

ggml-ci
2025-02-27 08:55:36 +02:00
1689aaf854 vulkan: initial support for IQ1_S and IQ1_M quantizations (llama/11528)
* vulkan: initial support for IQ1_S and IQ1_M quantizations

* vulkan: define MMV kernels for IQ1 quantizations

* devops: increase timeout of Vulkan tests again

* vulkan: simplify ifdef for init_iq_shmem
2025-02-27 08:55:36 +02:00
4b48fe449a opencl: Fix rope and softmax (llama/11833)
* opencl: fix `ROPE`

* opencl: fix `SOFT_MAX`

* Add fp16 variant

* opencl: enforce subgroup size for `soft_max`
2025-02-27 08:55:36 +02:00
47cc043e69 cuda : add ampere to the list of default architectures (llama/11870) 2025-02-27 08:55:36 +02:00
e3d9ffb98b ggml: optimize some vec dot functions for LoongArch ASX (llama/11842)
* Optimize ggml_vec_dot_q3_K_q8_K for LoongArch ASX

* Optimize ggml_vec_dot_q4_K_q8_K for LoongArch ASX

* Optimize ggml_vec_dot_q6_K_q8_K for LoongArch ASX

* Optimize ggml_vec_dot_q5_K_q8_K for LoongArch ASX

* Optimize ggml_vec_dot_q2_K_q8_K for LoongArch ASX

* Optimize mul_sum_i8_pairs_float for LoongArch ASX

* Optimize ggml_vec_dot_iq4_xs_q8_K for LoongArch ASX
2025-02-27 08:55:36 +02:00
Eve
e22d69839d vulkan: linux builds + small subgroup size fixes (llama/11767)
* mm subgroup size

* upload vulkan x86 builds
2025-02-27 08:55:36 +02:00
defe731263 llamafile: use member variable instead of constant for iq4nlt (llama/11780) 2025-02-27 08:55:36 +02:00
4e07957bf9 musa: bump MUSA SDK version to rc3.1.1 (llama/11822)
* musa: Update MUSA SDK version to rc3.1.1

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>

* musa: Remove workaround in PR #10042

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>

---------

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2025-02-27 08:55:36 +02:00
d2c5154bb5 ggml-cpu : add chunking support to mul_mat_id (llama/11666)
* ggml-cpu : add chunking support to mul_mat_id

* allocate chunk counter in wdata
parallelize src1 quantization by column to allows parallelization even when there is only one row

* disable for arm

* cleanup

* better way to disable for arm

* fix uninitialized counter when using 1 thread only

* revert test-backend-ops changes
2025-02-27 08:55:36 +02:00
4fac43fe00 ggml : x2 speed for WASM by optimizing SIMD (llama/11453)
* ggml : x2 speed for WASM by optimizing SIMD

* fix bad merging

* rm trailing spaces

* rm redundant clamp

* better quantize_row_q8_K

Co-authored-by: camel-cdr <camel-cdr@protonmail.com>

* remove memset that causes buffer overflow
Co-authored-by: camel-cdr <camel-cdr@protonmail.com>

---------

Co-authored-by: camel-cdr <camel-cdr@protonmail.com>
2025-02-27 08:55:36 +02:00
3be9670f17 HIP: Remove GCN from list of devices that avoid MMQ (llama/11831) 2025-02-27 08:55:36 +02:00
86729fcd6d HIP: Switch to std::vector in rocblas version check (llama/11820) 2025-02-27 08:55:36 +02:00
7fbca6304e cleanup: fix compile warnings associated with gnu_printf (llama/11811) 2025-02-27 08:55:36 +02:00
d597f83e1a ggml : fix multi-threaded clamp_f32 (llama/11824)
* Bug fix for clamp_f32

When using tensors larger than 1d clamp operation does not work due to the restriction of returning if ith is not 0.

* Bug fix for clamp_f32

* Bug fix for clamp_f32
2025-02-27 08:55:36 +02:00
e5edcc6259 ggml-cpu: Fix duplicate MATMUL_INT8 (llama/11817)
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
2025-02-27 08:55:36 +02:00
556f773d53 CUDA: fix CUDART_VERSION checks (llama/11821) 2025-02-27 08:55:36 +02:00
91d02de332 Fix #11802: Compile bug - RegQueryValueExA changed to RegQueryValueEx (llama/11803)
* Fix #11802: Compile bug - RegQueryValueExA changed to RegQueryValueEx

* Fix #11802: PR #11803 - keep RegQueryValueExA, remove TEXT macro, description needs to be ANSI string
2025-02-27 08:55:36 +02:00
1b67d72f87 CUDA: use arch list for compatibility check (llama/11775)
* CUDA: use arch list for feature availability check

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-02-27 08:55:36 +02:00
14d7c0368d fix: typos in documentation files (llama/11791)
* Update ggml.c

* Update arg.cpp

* Update speculative.h
2025-02-27 08:55:36 +02:00
db6e19188a vulkan: Make Vulkan optional at runtime (ggml/11493). (llama/11494)
Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
2025-02-27 08:55:36 +02:00
b4b063a5c9 vulkan: add environment variable GGML_VK_PREFER_HOST_MEMORY to avoid VRAM allocation (llama/11592) 2025-02-27 08:55:36 +02:00
930b739e7a vulkan: account for lookup tables when checking shared memory size (llama/11502) 2025-02-27 08:55:36 +02:00
5981352bb5 ggml: Fix data race in ggml threadpool (llama/11736)
After the barrier in last iteration is executed, still the loop termination
condition will be executed. However main thread can destroy the cgraph object
and its nodes already, then another thread will access it, but the thing is already gone.
Also trouble can happen when n_nodes == 0 or abort is called, but I'm not sure if the
prior situation is possible.

Last syncronization should be done after the loop to ensure the cgraph/cplan won't be
accessed after the main thread exits from the function.
2025-02-27 08:55:36 +02:00
7561da244e CUDA: fix min. version for movmatrix (llama/11751) 2025-02-27 08:55:36 +02:00
be83f342fb vulkan: print shared memory size (llama/11719) 2025-02-27 08:55:36 +02:00
fd369871f7 SYCL: remove XMX info from print devices (llama/11712) 2025-02-27 08:55:36 +02:00
bbd8364f5e ggml : optimize and build warning fix for LoongArch (llama/11709)
* ggml : optimize convert f32<->f16 for loongarch_asx

* ggml : optimize loongarch_asx extend i16,i8,u8 to i32,i16

* ggml : Fix warnings when run cpu CI locally on LoongArch
2025-02-27 08:55:36 +02:00
e4102440ef SYCL: Adjust support condition for norm operators (llama/11674)
SYCL does not support non contiguous tensors for norm operations
2025-02-27 08:55:36 +02:00
f8242ec483 ggml : fix LoongArch compile error with 128-bit SIMD (llama/11701) 2025-02-27 08:55:36 +02:00
ef51b4cba4 vulkan: optimize coopmat2 iq2/iq3 callbacks (llama/11521)
* vulkan: optimize coopmat2 iq2/iq3 callbacks

* build: trigger CI on GLSL compute shader changes
2025-02-27 08:55:36 +02:00
6f08b24146 vulkan: initial support for IQ4_XS quantization (llama/11501) 2025-02-27 08:55:36 +02:00
7c165d7fa8 vulkan: use smaller combined allocations to avoid fragmentation (llama/11551) 2025-02-27 08:55:36 +02:00
2f0cf44915 metal : avoid breaking build when metal API predates TARGET_OS_VISION (llama/11690)
Avoids breakage in nix flake build introduced by b0569130c5e9c671152c913d82803b7c2f014ff9
2025-02-27 08:55:36 +02:00
b9c972fd0d metal : adjust support conditions for norm operators (llama/11671)
cont #11659

ggml-ci
2025-02-27 08:55:36 +02:00
01c9aafbfd CUDA: support for mat. mul. with ne03 != ne13 (llama/11656) 2025-02-27 08:55:36 +02:00
bae6bbf487 CUDA: non-contiguous (RMS) norm support (llama/11659)
* CUDA: non-contiguous (RMS) norm support

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-27 08:55:36 +02:00
c310272fa0 HIP: force max threads per block to be 1024 (llama/11621)
Some old/vendor forked version of llvm still use 256. Explicitly set it to 1024 to align with upstream llvm.

Signed-off-by: fxzjshm <fxzjshm@163.com>
2025-02-27 08:55:36 +02:00
bd0b55dbe0 metal : use residency set for other platforms (llama/11648) 2025-02-27 08:55:36 +02:00
ba4645db2c rpc: fix known RCE in rpc-server (ggml/1103)
Add bounds checking in `rpc_server::copy_tensor` to prevent out-of-bounds writes
+ Check if  `(uint8_t *)dst->data + ggml_nbytes(src)` remains within the destination buffer’s allocated region.
2025-02-27 08:55:36 +02:00
dfc6ca62f3 stream : add beam size parameter(#2836)
* feat: Add beam size parameter to stream.cpp for beam search configuration

* feat: Add beam size parameter to whisper full params in stream example

* fix: Remove duplicate beam search size assignment in server.cpp
2025-02-25 11:39:33 +02:00
47e14c0529 whisper : restore big endian support (#2816)
* whisper : fix BYTESWAP whitespace

* whisper : make byteswap useable with C++17

* cmake : define WHISPER_BIG_ENDIAN for big-endian targets

* ci : fix (again) arm64 build fails

* docker : attempt fixing arm64 build on ci

* qemu v7.0.0-28

[imported from
https://github.com/ggml-org/llama.cpp
/commit/818a340ea8be55b3706e1772527cb8738e90a8c7
(#11895)]

---------

Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
2025-02-25 11:38:13 +02:00
d682e15090 Fixes for Windows (#2790)
Fixes for Windows:

* MSVC default to utf-8 without BOM.
* Console output code page changed to utf-8.

---------

Co-authored-by: Judd <foldl@boxvest.com>
2025-02-06 15:37:21 +08:00
46d07b9c85 cmake : fix compile assumptions for power9/etc (#2777)
* Add small comment re: VSX to readme

Co-authored-by: midnight <midnight@example.com>
2025-02-05 14:41:10 +02:00
33ea03f131 authors : update 2025-02-04 13:03:40 +02:00
dbcc669e1a sync : ggml 2025-02-04 13:03:09 +02:00
16245b35e4 cmake: Add ability to pass in GGML_BUILD_NUMBER (ggml/1096)
This makes git as a dependency optional, and is useful in the case where
ggml is built not from git, but from a tarball, or a distribution source
package.

This conditional also affects GGML_BUILD_COMMIT. Nothing seems to be
using it, though, so there doesn't seem much value factor it out, or
even require it.
2025-02-04 13:03:03 +02:00
898c0cb9d1 readme : add maintenance roadmap 2025-02-04 10:50:10 +02:00
eb9e5032c4 ci : add stalebot 2025-02-04 09:30:20 +02:00
cadfc50eab node : add max_len params in node addon (#2760) 2025-02-03 22:49:06 +02:00
3f91832352 talk-llama : sync llama.cpp 2025-02-03 22:42:26 +02:00
cff8868b5f coreml : always convert to "neuralnetwork" (#2770) 2025-02-03 22:36:32 +02:00
90e3c5fc40 ci : more git 2025-02-03 22:00:57 +02:00
e0f4cef867 ci : install git 2025-02-03 22:00:57 +02:00
234460987e ci : use ubuntu-22.04 instead of ubuntu-latest 2025-02-03 22:00:57 +02:00
b8ab126343 cmake : sync cmake scripts 2025-02-03 22:00:57 +02:00
edc5d9267c sync : ggml 2025-02-03 22:00:57 +02:00
344b98a44f scripts : fix sync paths 2025-02-03 22:00:57 +02:00
dbeb7916b8 CUDA: fix Volta FlashAttention logic (llama/11615) 2025-02-03 22:00:57 +02:00
fad2806352 HIP: fix flash_attn_stream_k_fixup warning (llama/11604) 2025-02-03 22:00:57 +02:00
9906792ec3 CUDA/HIP: add support for selectable warp size to mmv (llama/11519)
CUDA/HIP: add support for selectable warp size to mmv
2025-02-03 22:00:57 +02:00
c49ee07ff4 HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (llama/11601)
This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly
2025-02-03 22:00:57 +02:00
f8a831779e CUDA: use mma PTX instructions for FlashAttention (llama/11583)
* CUDA: use mma PTX instructions for FlashAttention

* __shfl_sync workaround for movmatrix

* add __shfl_sync to HIP

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-02-03 22:00:57 +02:00
85451e3612 ci: use sccache on windows instead of ccache (llama/11545)
* Use sccache on ci for windows

* Detect sccache in cmake
2025-02-03 22:00:57 +02:00
43c744ce8b HIP: require at least HIP 5.5 2025-02-03 22:00:57 +02:00
fc2e44490d HIP: Prepare reduction operators for wave 64 2025-02-03 22:00:57 +02:00
f41fdad200 CUDA/HIP: add warp_size to cuda_device_info 2025-02-03 22:00:57 +02:00
80fa576254 vulkan: implement initial support for IQ2 and IQ3 quantizations (llama/11360)
* vulkan: initial support for IQ3_S

* vulkan: initial support for IQ3_XXS

* vulkan: initial support for IQ2_XXS

* vulkan: initial support for IQ2_XS

* vulkan: optimize Q3_K by removing branches

* vulkan: implement dequantize variants for coopmat2

* vulkan: initial support for IQ2_S

* vulkan: vertically realign code

* port failing dequant callbacks from mul_mm

* Fix array length mismatches

* vulkan: avoid using workgroup size before it is referenced

* tests: increase timeout for Vulkan llvmpipe backend

---------

Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
2025-02-03 22:00:57 +02:00
75e7d0585e vulkan: Catch pipeline creation failure and print an error message (llama/11436)
* vulkan: Catch pipeline creation failure and print an error message

Also, fix some warnings from my on-demand compile change.

* vulkan: fix pipeline creation logging
2025-02-03 22:00:57 +02:00
682a6f5f87 HIP: Supress transformation warning in softmax.cu
loops with bounds not known at compile time can not be unrolled.
when ncols_template == 0, the bounds of the loop are not constexpr, thus llvm cant unroll the loops here.
2025-02-03 22:00:57 +02:00
115716d109 HIP: Only call rocblas_initialize on rocblas versions with the multiple instantation bug (llama/11080)
This disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.
2025-02-03 22:00:57 +02:00
b2cfef655b cmake : don't fail on GGML_CPU=OFF (llama/11457) 2025-02-03 22:00:57 +02:00
22e3df0afa SYCL : SOFTMAX F16 mask support and other fixes (llama/11261)
Implemented ggml_sycl_op_soft_max() F16 src1(mask) support for which a pragma deprecation warning was added during #5021.
To do this, had to decouple it from ggml_sycl_op_flatten which always considered src1 to be of fp32 type(many OP functions are dependent on it).

* SYCL: SOFTMAX F16 mask support and other fixes

* test-backend-ops: Add F16 mask test cases
2025-02-03 22:00:57 +02:00
028511d349 AMD: parse the architecture as supplied by gcnArchName (llama/11244)
The value provided by minor doesn't include stepping for AMD, parse the value returned by gcnArchName instead to retrieve an accurate ID.
2025-02-03 22:00:57 +02:00
70c4038842 metal: Handle null returned from MTLCreateSystemDefaultDevice() (llama/11441)
This fixes segmentation fault error when running tests when no metal
devices are available (for example, when not linked with Core Graphics
framework or otherwise).
2025-02-03 22:00:57 +02:00
8639c003a9 metal : use residency sets (llama/11427)
* metal : use residency sets

ggml-ci

* metal : restore commandBufferWithUnretainedReferences calls [no ci]

* metal : release descriptors

ggml-ci

* metal : check env GGML_METAL_NO_RESIDENCY

ggml-ci

* metal : fix build + clean-up

ggml-ci
2025-02-03 22:00:57 +02:00
d5d831da65 cmake: add ggml find package (llama/11369)
* Add initial ggml cmake package

* Add build numbers to ggml find-package

* Expand variables with GGML_ prefix

* Guard against adding to cache variable twice

* Add git to msys2 workflow

* Handle ggml-cpu-* variants

* Link ggml/ggml-base libraries to their targets

* Replace main-cmake-pkg with simple-cmake-pkg

* Interface features require c_std_90

* Fix typo

* Removed unnecessary bracket from status message

* Update examples/simple-cmake-pkg/README.md

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update examples/simple-cmake-pkg/README.md

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-03 22:00:57 +02:00
7230a6e1c8 vulkan: compile shaders on-demand (llama/11406)
Reduce first-run startup time and memory consumption.

Should fix #11339.
2025-02-03 22:00:57 +02:00
a160fa0f3a Hip: disable VMM on hip as it seams that it dosent work in some configurations (llama/11420) 2025-02-03 22:00:57 +02:00
0282ad8fd1 hip : Add hipGraph and VMM support to ROCM (llama/11362)
* Add hipGraph support

* Enable VMM on rocm
2025-02-03 22:00:57 +02:00
9e467815d4 CUDA: fix FP16 cuBLAS GEMM (llama/11396) 2025-02-03 22:00:57 +02:00
727891d9bf rocBLAS: Avoid fp32->fp16->fp32 conversion on cdna (llama/11356) 2025-02-03 22:00:57 +02:00
c262dc80e2 CPU/CUDA: fix (GQA) mul mat back, add CUDA support (llama/11380) 2025-02-03 22:00:57 +02:00
30767b4c4e cmake : avoid -march=native when reproducible build is wanted (llama/11366)
See https://reproducible-builds.org/ for why this is good
and https://reproducible-builds.org/specs/source-date-epoch/
for the definition of this variable.

Without this patch, compiling on different machines produced different binaries, which made verification of results difficult.

Fixes: #11317

This patch was done while working on reproducible builds for openSUSE.
2025-02-03 22:00:57 +02:00
16eeb31933 Vulkan-run-test: fix mmq_wg_denoms (llama/11343)
There should be a copy-and-paste error here.

*mmq_wg_denoms should be used together with *warptile_mmq, instead of
wg_denoms.
2025-02-03 22:00:57 +02:00
ba523d5e22 vulkan: sort shaders for more deterministic binary (llama/11315)
Fixes #11306.
2025-02-03 22:00:57 +02:00
3736706139 vulkan: fix diag_mask_inf (llama/11323)
With robustbufferaccess disabled, this shader was showing OOB stores. There
is a bounds check in the code, but the workgrouop dimensions were reversed vs
CUDA and it was running the wrong number of threads. So fix the workgroup
dimensions and disable robustness for this pipeline.
2025-02-03 22:00:57 +02:00
58640aa456 rpc : better caching of the base buffer pointer (llama/11331)
There is no need to use map, just store the base pointer in the buffer
context.
2025-02-03 22:00:57 +02:00
5183a05e56 metal : fix out-of-bounds write (llama/11314)
ggml-ci
2025-02-03 22:00:57 +02:00
0dcada42d4 vulkan: fix coopmat2 validation failures (llama/11284)
mul mat and flash attention shaders were loading f32 types directly into
A/B matrices, which happens to work but is technically invalid usage.
For FA, we can load it as an Accumulator matrix and convert and this
is not in the inner loop and is cheap enough. For mul mat, it's more
efficient to do this conversion in a separate pass and have the input(s)
be f16.

coopmat2 requires SPIR-V 1.6 (related using to LocalSizeId). LocalSizeId
requires maintenance4 be enabled, and SPIR-V 1.6 requires Vulkan 1.3.
2025-02-03 22:00:57 +02:00
d507b4cebe SYCL: Introducing memory host pool (llama/11251)
* Implement host pool for matrix_info

Creating a new memory pool on the host to store memory location for
matrix_info needed to launch gemm_batch from oneMKL/oneMath.
Removing complex support in gemm_batch since it is not used in llama.cpp

* Remove unnecessary headers and cast

* Reorder member variable to avoid warning on initialization

* Formatting

* Remove unused variable

* Address PR review feedback - remove warning

---------

Signed-off-by: nscipione <nicolo.scipione@codeplay.com>
2025-02-03 22:00:57 +02:00
90171055f3 cmake : add sanitizer flags for llama.cpp (llama/11279)
* cmake : add sanitizer flags for llama.cpp

ggml-ci

* tests : fix compile warnings

ggml-ci

* cmake : move sanitizer flags to llama_add_compile_flags

ggml-ci

* cmake : move llama.cpp compile flags to top level lists

ggml-ci

* cmake : apply only sanitizer flags at top level

ggml-ci

* tests : fix gguf context use in same_tensor_data

* gguf-test: tensor data comparison

* dummy : trigger ggml-ci

* unicode : silence gcc warnings

ggml-ci

* ci : use sanitizer builds only in Debug mode

ggml-ci

* cmake : add status messages [no ci]

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2025-02-03 22:00:57 +02:00
668306ff2b vulkan: fix coopmat2 flash attention for non-contiguous inputs (llama/11281)
Add code similar to mul_mm_cm2 to force alignment of strides, to avoid
a performance regression.

Add noncontiguous FA tests in test-backend-ops.

Fixes #11268.
2025-02-03 22:00:57 +02:00
fdc21fc87b rpc : early register backend devices (llama/11262)
Early register RPC devices and do not propagate RPC specifics in the
llama model structures.

ref: #10609
2025-02-03 22:00:57 +02:00
7183a1eb72 vulkan: support copy from f32 to q4_0/q4_1/q5_0/q5_1/q8_0/iq4_nl (llama/11166)
* vulkan: support copy from f32 to q4_0/q4_1/q5_0/q5_1/q8_0/iq4_nl

Shaders are based on cpy.cu.

* vulkan: support copy from q4_0/q4_1/q5_0/q5_1/q8_0/iq4_nl to f32

* ggml: copy q->f32 assumes some contiguity in the destination
2025-02-03 22:00:57 +02:00
09f3c66648 vulkan: optimize coopmat2 q4_k/q5_k dequant functions. (llama/11206)
Do masking on whole dwords, fetch all scales at once.
2025-02-03 22:00:57 +02:00
62e2414620 vulkan: optimize coopmat2 q2_k dequant function (llama/11130) 2025-02-03 22:00:57 +02:00
de49024e49 CUDA: backwards pass for misc. ops, add tests (llama/11257)
* CUDA: backwards pass for misc. ops, add tests

* remove restrict from pointers
2025-02-03 22:00:57 +02:00
db6383094c ggml: aarch64: implement SVE kernels for q4_K_q8_K vector dot (llama/11227)
* Add SVE support for q4_K_q8_K

* Update ggml/src/ggml-cpu/ggml-cpu-quants.c

change to use K_SCALE_SIZE

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-02-03 22:00:57 +02:00
Eve
164f13c6a9 vulkan: scale caching for k quants + misc fixes (llama/11081)
* q6_k scale caching

* 16 bit unpack

* q4_k test (slow)

* revert it

* q3_k

* q2_k

* little stuff

* try precalculating products of a and q2_k scales

* Revert "try precalculating products of a and q2_k scales"

This reverts commit 65110b81f23f66331a50c6e889a7c1ab9470a86b.

* unpack should be u16, add vim swap to gitignore (about time)

* better q4_k scales

* q5_k

* better q6_k with separate paths for all threads and partial threads in use, plus some more optimizations

* q2_k better dequant

* q3_k optimizations

* q3_k use hmask simd from cpu avx version

* make the caches happy

* q3_k separate out calculation

* q2_k separate out

* little stuff

* use calc_superblock everywhere

* q2_k optimize scale calculation

* more barriers
2025-02-03 22:00:57 +02:00
02aa86230a fix: ggml: fix vulkan-shaders-gen build (llama/10448)
* fix: ggml: fix vulkan-shaders-gen build

The vulkan-shaders-gen target was not being built correctly
in case of cross-compilation.
Other outputs need to be built for the cross compile target,
but vulkan-shaders-gen needs to be built for the host.

* refactor: ggml: Improve vulkan-shaders-gen toolchain setup

- Add GGML_SHADERS_GEN_TOOLCHAIN CMake option.
- Auto-detect host toolchain if not set.

* refactor: ggml: Improve vulkan-shaders-gen toolchain setup

Use configure_file to generate host_toolchain.cmake from template

* fix: ggml: Fix compile error

Fix compile error not finding vulkan-shaders-gen

* fix: vulkan-shaders-gen build and path handling

Fix build issues with vulkan-shaders-gen:
- Add target dependency for correct build order
- Use CMAKE_HOST_SYSTEM_NAME for executable suffix
- Fix MSVC output directory in host toolchain
- Normalize path handling for cross-compilation

* fix: improve host compiler detection in vulkan shader build

Improve host compiler detection for vulkan shader generation:
- Add NO_CMAKE_FIND_ROOT_PATH to all compiler searches
- Consolidate compiler detection logic
- Fix Windows-specific MSVC detection
- Ensure correct compiler search in cross-compilation

* refactor: Simplify CMake function for detecting host compiler

Simplified the CMake function to improve the process of detecting the host compiler.

* fix: Remove unnecessary Vulkan library linkage in CMakeLists.txt

Since `vulkan-shader-gen.cpp` only requires the `glslc` executable
and not the Vulkan headers or libraries, CMakeLists.txt needs to
be corrected.
(See: ecc93d0558fc3ecb8a5af69d2ece02fae4710ade)

* refactor: Rename host_toolchain.cmake.in

- Rename host_toolchain.cmake.in to cmake/host-toolchain.cmake.in

* refactor: GGML_VULKAN_SHADERS_GEN_TOOLCHAIN

Rename the macro GGML_SHADERS_GEN_TOOLCHAIN to GGML_VULKAN_SHADERS_GEN_TOOLCHAIN
2025-02-03 22:00:57 +02:00
54a2ee648f RoPE: fix back, CUDA support for back + noncont. (llama/11240)
* RoPE: fix back, CUDA support for back + noncont.

* fix comments reg. non-cont. RoPE support [no-ci]
2025-02-03 22:00:57 +02:00
9700cfb0a3 SYCL: Add gated linear attention kernel (llama/11175)
* SYCL: Add Gated Linear attention kernel

* glahpp: add a space at the end of file

* gla: Put the barrier inside the main logic loop
2025-02-03 22:00:57 +02:00
8e0143e205 ggml : add option to not print stack on abort (ggml/1081)
* Add option to not print stack on abort

Add option/envvar to disable stack printing on abort.
Also link some unittests with Threads to fix link errors on
ubuntu/g++11.

* Update ggml/src/ggml.c

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-02-03 22:00:57 +02:00
f12559d590 ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. (ggml/1065)
some threads kept looping and failed to terminate properly after an abort during CPU execution.

Co-authored-by: issi <issi@gmail.com>
2025-02-03 22:00:57 +02:00
589b40810a ci : dummy commit to trigger CI 2025-02-03 16:32:48 +02:00
7ffcd05267 ruby : Make context accept initial parameters, API to retrieve a segment and more (#2749)
* Fix type signature for Whisper.log_set

* Use cache file for model when offline

* Extract ruby_whisper_transcribe() into a file

* Extract Whisper::Error

* Use FileList for ext/*.{c,cpp,h}

* Extract Whisper::Segment

* Extract Whisper::Model

* Extract Whisper::Params

* Extract Whisper::Context

* Extract log_callback function

* Write base code in C rather than C++

* Use chdir instead of Dir.chdir in Rakefile

* Define alloc func for Whisper::Model

* Define Whisper::Params' calback and user data reader

* Add test for Whisper::Params.new with keyword arguments

* Make Whisper::Params.new accept keyword arguments

* Update type signatures

* Update README

* Update CLEAN targets

* Fix document comment for Whisper::Params#new_segment_callback=

* Use macro to define params

* Fix dependency of build task

* Set Whisper.finalize_log_callback visibility to private

* Make Whisper::Context#full and full_parallel return self

* Add test for Whisper::Context#full_get_segment

* Add Whisper::Context#full_get_segment

* Update signatures

* Update README

* Fix signature

* Resplace #initialize with .new in signature file [skip ci]

* Fix potential overflow
2025-01-21 09:39:54 +02:00
7a423f1c00 whisper.objc : fix build and CI 2025-01-18 12:06:06 +02:00
99b011a9f5 talk-llama : sync llama.cpp 2025-01-14 10:38:01 +02:00
19d95f9f9a sync : ggml 2025-01-14 10:38:01 +02:00
d5ef1737d8 GGUF: C++ refactor, backend support, misc fixes (skip) (llama/11030)
ggml-ci
2025-01-14 10:38:01 +02:00
1deb41f0e7 ggml : add opencl backend (skip) (llama/10693)
---------

Co-authored-by: Skyler Szot <quic_sszot@quicinc.com>
Co-authored-by: Shangqing Gu <quic_shawngu@quicinc.com>
Co-authored-by: Alexander Angus <quic_aangus@quicinc.com>
Co-authored-by: Hongqiang Wang <quic_wangh@quicinc.com>
Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com>
2025-01-14 10:38:01 +02:00
2425caf4fd cuda : CUDA Graph Compute Function Refactor (precursor for performance improvements) (llama/11042)
* Refactor: Moves cuda graph executable update step to separate function.

* Refactor: Moves cuda graph update check to separate function.

* Refactor: Moves cuda graph maintenance (update or adjusting copy parameters) to separate function for improved readability.

* Fix: Adds missing reference to maintain_cuda_graph() definition.

* Refactor: Improves structure and abstractions by moving CUDA graph evaluation and capture to its own function.

* Refactor: Moves node graph checks and copy ops into individual function for improved readability.

* Refactor: Removes code permanently excluded from compilation to increase readability.

* Style: Adds missing newline

* Style: Consolidates several neighboring '#ifdef USE_CUDA_GRAPH' into a single one

* Refactor: Makes 'cuda_graph_update_required' a local variable

* remove double lines between functions

---------

Co-authored-by: slaren <slarengh@gmail.com>
2025-01-14 10:38:01 +02:00
a4b00bcaaf ggml : do not define GGML_USE_CUDA when building with GGML_BACKEND_DL (llama/11211)
Build fails when using HIP and GGML_BACKEND_DL:
```
/usr/bin/ld: ../ggml/src/libggml.so: undefined reference to `ggml_backend_cuda_reg'
collect2: error: ld returned 1 exit status
```
This patch fixes this.
2025-01-14 10:38:01 +02:00
cdb8aa2f2e Vulkan: Fix float16 use on devices without float16 support + fix subgroup_size_control validation error (llama/11161)
* Vulkan: Remove float16 use in shaders

* Fix validation error about subgroup_size_control extension
2025-01-14 10:38:01 +02:00
06209f6683 llama: add support for QRWKV6 model architecture (llama/11001)
llama: add support for QRWKV6 model architecture (llama/11001)

* WIP: Add support for RWKV6Qwen2

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* RWKV: Some graph simplification

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Add support for RWKV6Qwen2 with cpu and cuda GLA

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* RWKV6[QWEN2]: Concat lerp weights together to reduce cpu overhead

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Fix some typos

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* code format changes

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Fix wkv test & add gla test

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Fix cuda warning

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Update README.md

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Update ggml/src/ggml-cuda/gla.cu

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Fix fused lerp weights loading with RWKV6

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* better sanity check skipping for QRWKV6 in llama-quant

thanks @compilade

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
Co-authored-by: compilade <git@compilade.net>

---------

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: compilade <git@compilade.net>
2025-01-14 10:38:01 +02:00
c3235bd81e SYCL: Refactor ggml_sycl_compute_forward (llama/11121)
* SYCL: refactor ggml_sycl_compute_forward

* SYCL: add back GGML_USED(dst) to ggml_sycl_cpy

* SYCL: add function name to noop debug

* SYCL: Some device info print refactoring and add details of XMX availability
2025-01-14 10:38:01 +02:00
262d0abc87 fix: add missing msg in static_assert (llama/11143)
Signed-off-by: hydai <z54981220@gmail.com>
2025-01-14 10:38:01 +02:00
124eec1664 llamafile : ppc64le MMA INT8 implementation (llama/10912)
This change upstreams llamafile's cpu matrix
multiplication kernels for ppc64le using MMA
builtins for quantised int8 datatype.

This change results in 10% - 70% improvement
in total speed(ie all tokens/total time), across
various batch sizes.

The patch is tested with Meta-Lllama-3-8B,
Mistral-7B, Llama-2-7B-chat-hf models on a
IBM POWER10 machine.

Signed-off-by: Amrita H S <amritahs@linux.vnet.ibm.com>
2025-01-14 10:38:01 +02:00
b08c3a88c8 Disable GL_KHR_cooperative_matrix Vulkan extension if not available. (llama/11117)
* Disable GL_KHR_cooperative_matrix Vulkan extension if not available.

* Perform Vulkan extensions checks in a more sensible order

* Remove unnecessary #ifdef directive
2025-01-14 10:38:01 +02:00
0afce25a69 fix: Vulkan shader gen binary path when Cross-compiling (llama/11096)
* fix: Vulkan shader gen binary path when cross compiling
2025-01-14 10:38:01 +02:00
acdbe58631 GGUF: C++ refactor, backend support, misc fixes (llama/11030)
* GGUF: C++ refactor, backend support, misc fixes

remove ggml_tensor.backend

update CODEOWNERS [no ci]

remove gguf_get_data from API

revise GGUF API data types
2025-01-14 10:38:01 +02:00
09fabffdf5 ggml-backend : only offload from host buffers (fix) (llama/11124) 2025-01-14 10:38:01 +02:00
3988d6396b ggml-backend : only offload from host buffers (llama/11120) 2025-01-14 10:38:01 +02:00
c8c63eeec0 rpc : code cleanup (llama/11107)
Remove duplicated macros, use GGML_LOG_ERROR for errors
2025-01-14 10:38:01 +02:00
abf7f24410 SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6 (llama/11087)
* SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6

* Revert "SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6"

This reverts commit f62dc45f318e48d375e7734b34cbddee81deed52.

* Reland: Use get_multi_ptr instead of deprecated get_pointer in wkv6
2025-01-14 10:38:01 +02:00
341f5c28e6 CUDA: add BF16 support (llama/11093)
* CUDA: add BF16 support
2025-01-14 10:38:01 +02:00
5377099524 Vulkan: Add device-specific blacklist for coopmat for the AMD proprietary driver (llama/11074)
* Vulkan: Add device-specific blacklist for coopmat for the AMD proprietary driver

* Add (TM) to AMD name check
2025-01-14 10:38:01 +02:00
dcbb375779 Support for models with non-512-aligned tensors over RPC. (llama/11047)
* Added init tensor calling code

* Added get_alloc_size forwarding

* Cleaned up and improved type/error handling.

* fix: remove trailing whitespaces.

* Cleanup and use GGML error logging functions.

* Handle potentially dangerous edge cases.

* Apply suggestions from code review

Co-authored-by: Diego Devesa <slarengh@gmail.com>

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-01-14 10:38:01 +02:00
4334c71aed fix: Vulkan shader gen binary path (llama/11037) 2025-01-14 10:38:01 +02:00
e875a82473 ggml : allow loading backend with env variable (ggml/1059)
ref: #1058
2025-01-14 10:38:01 +02:00
507e230f1e scripts : sync opencl, gguf 2025-01-14 09:42:16 +02:00
eb68324c86 whisper : fix gpu device selection (#2728) 2025-01-13 13:11:37 +02:00
e940fbf283 server : fix build (#2718) 2025-01-13 08:57:33 +02:00
35d0e02c72 talk-llama : sync llama.cpp (#2709) 2025-01-13 08:55:48 +02:00
45d3faf961 server : generate unique tmp filenames (#2718)
#Summary

This Merge Request adds a mechanism to generate unique filenames for FFmpeg conversions in whisper_server.cpp. Previously, a single fixed filename was used (e.g., whisper-server-tmp.wav), which could result in unexpected file overwrites under certain circumstances. By generating a unique filename per request, any risk of overwriting temporary files is eliminated.

#Background / Motivation
	•	Problem: Relying on a static filename for temporary audio files may lead to overwrites if multiple operations occur simultaneously or if the same file name is reused.
	•	Goal: Dynamically generate unique filenames, ensuring each request or operation uses an isolated temporary file.
2025-01-13 08:55:21 +02:00
2ab2eb5110 whisper : add whisper_full_get_segment_no_speech_prob_from_state (#2716) 2025-01-09 16:21:07 +02:00
b82d305282 readme : add docker instructions (#2711)
I found the docker instructions to be useful in the README.md and the differences in docker variants such as ffmpeg and cuda support. However, this section was removed in v1.7.4 and I would vote to bring it back.

This is a pull request to add that section back.
2025-01-07 13:20:51 +02:00
885e31368d docs: Fix main -> whisper-cli in download scripts (#2707) 2025-01-06 15:17:57 +02:00
8a9ad7844d release : v1.7.4 2025-01-06 15:13:48 +02:00
eb874b3a3c ci : cont 2025-01-06 10:46:10 +02:00
eb78e3a3f1 ci : fix ubuntu runner names 2025-01-06 09:29:10 +02:00
ece3ff88f6 cli : fix segfault on missing argument (#2700) 2025-01-04 10:47:41 +02:00
9366544991 ci : fix arm builds 2025-01-04 10:45:01 +02:00
95583942ed sync : ggml
ggml-ci
2025-01-04 10:45:01 +02:00
2e93cb6a2f ggml : do not install metal source when embed library (ggml/1054) 2025-01-04 10:45:01 +02:00
de5cd60d1c metal : avoid uint (llama/11019) 2025-01-04 10:45:01 +02:00
3fcba3e58b ggml : fixes for AVXVNNI instruction set with MSVC and Clang (llama/11027)
* Fixes for clang AVX VNNI

* enable AVX VNNI and alder lake build for MSVC

* Apply suggestions from code review

---------

Co-authored-by: slaren <slarengh@gmail.com>
2025-01-04 10:45:01 +02:00
cea5f1c52f vulkan: optimize mul_mat for small values of N (llama/10991)
Make the mul_mat_vec shaders support N>1 (as a spec constant, NUM_COLS) where
the batch_strides are overloaded to hold the row strides. Put the loads from the
B matrix in the innermost loop because it should cache better.

Share some code for reducing the result values to memory in mul_mat_vec_base.
2025-01-04 10:45:01 +02:00
2112462db4 vulkan: im2col and matmul optimizations for stable diffusion (llama/10942)
* tests: Add im2col perf tests

* vulkan: optimize im2col, more elements per thread

* vulkan: increase small tile size for NV_coopmat2

* vulkan: change im2col to 512 elements per workgroup
2025-01-04 10:45:01 +02:00
fc84ecd445 vulkan: Use push constant offset to handle misaligned descriptors (llama/10987) 2025-01-04 10:45:01 +02:00
Eve
8de1e99907 vulkan: multi-row k quants (llama/10846)
* multi row k quant shaders!

* better row selection

* more row choices

* readjust row selection

* rm_kq=2 by default
2025-01-04 10:45:01 +02:00
499af9294a examples, ggml : fix GCC compiler warnings (llama/10983)
Warning types fixed (observed under MSYS2 GCC 14.2.0):
* format '%ld' expects argument of type 'long int', but argument has type 'size_t'
* llama.cpp/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp:81:46: warning: missing initializer for member '_STARTUPINFOA::lpDesktop' [-Wmissing-field-initializers]  (emitted for all struct field except first)
2025-01-04 10:45:01 +02:00
bcf937c216 ggml : more perfo with llamafile tinyblas on x86_64 (llama/10714)
* more perfo with llamafile tinyblas on x86_64.

- add bf16 suport
- change dispache strategie (thanks:
https://github.com/ikawrakow/ik_llama.cpp/pull/71 )
- reduce memory bandwidth

simple tinyblas dispache and more cache freindly

* tinyblas dynamic dispaching

* sgemm: add M blocs.

* - git 2.47 use short id of len 9.
- show-progress is not part of GNU Wget2

* remove not stable test
2025-01-04 10:45:01 +02:00
b8d90953d7 ggml : use wstring for backend search paths (llama/10960)
ggml-ci
2025-01-04 10:45:01 +02:00
60a422147b ggml : fix arm enabled features check (llama/10961) 2025-01-04 10:45:01 +02:00
3387415bad ggml : fix const usage in SSE path (llama/10962) 2025-01-04 10:45:01 +02:00
536ca3ec89 ggml : fix run-time on FreeBSD in get_executable_path() (llama/10948) 2025-01-04 10:45:01 +02:00
a4bb983190 vulkan: build fixes for 32b (llama/10927)
* vulkan: build fixes for 32b

Should fix #10923

* vulkan: initialize some buffer/offset variables
2025-01-04 10:45:01 +02:00
39c205f555 vulkan: optimize coopmat2 dequant functions (llama/10855)
Change the code to do 16b loads when possible and extract the appropriate
component late, so the code is effectively decoding a pair of elements and
then selecting one. This can allow more commoning to happen in the compiler
when neighboring elements are loaded.
2025-01-04 10:45:01 +02:00
6d502f33dc ggml-cpu: replace NEON asm with intrinsics in ggml_gemv_q4_0_4x8_q8_0() (llama/10874)
* ggml-cpu: replace NEON asm with intrinsics in ggml_gemv_q4_0_4x8_q8_0()

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* ggml-cpu: format code

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

---------

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2025-01-04 10:45:01 +02:00
5ea27d089d SYCL: Migrate away from deprecated ggml_tensor->backend (llama/10840)
* Migrate to tensor->buffer for checking backend buffer type: 1

* SYCL: common.cpp try to migrate away from tensor->backend

* SYCL: fix assertions and add proper comments

* SYCL: remove extra space

* SYCL: Add back static to ggml_backend_buffer_is_sycl_split function

* SYCL: Add pragma directive to suppress warning spam

* SYCL: Integrate debug logs with GGML_LOG and other fixes

* Revert "SYCL: Integrate debug logs with GGML_LOG and other fixes"

This reverts commit 2607b7de0f0d2f4f1f690226f86fa861aa39cb97.
Let's keep the current SYCL specific logging mechanism for now

* SYCL: Use GGML_SYCL_DEBUG after reverting

* SYCL: reg_get_proc_address func, update to the current func signature

* SYCL: Refactor SYCL buffer checks in ggml_sycl_cpy_tensor_2d
2025-01-04 10:45:01 +02:00
1462d92588 ggml : add test for SVE and disable when it fails (llama/10906) 2025-01-04 10:45:01 +02:00
7ba1a41f47 ggml: fix arm build with gcc (llama/10895)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2025-01-04 10:45:01 +02:00
5ea088636f ggml : fix arm build (llama/10890)
* ggml: GGML_NATIVE uses -mcpu=native on ARM

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* ggml: Show detected features with GGML_NATIVE

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* remove msvc support, add GGML_CPU_ARM_ARCH option

* disable llamafile in android example

* march -> mcpu, skip adding feature macros

ggml-ci

---------

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
Co-authored-by: Adrien Gallouët <angt@huggingface.co>
2025-01-04 10:45:01 +02:00
f32ddb3b1c tts : add OuteTTS support (llama/10784)
* server : add "tokens" output

ggml-ci

* server : output embeddings for all tokens when pooling = none

ggml-ci

* server : be explicit about the pooling type in the tests

ggml-ci

* server : do not normalize embeddings when there is no pooling

ggml-ci

* llama : add OuteTTS support (wip)

* wip

* extract features

* first conv

* group norm

* resnet conv

* resnet

* attn

* pos net

* layer norm

* convnext

* head

* hann window

* fix n_embd + remove llama.cpp hacks

* compute hann window

* fft

* spectrum processing

* clean-up

* tts : receive input text and generate codes

* clip : fix new conv name

* tts : minor fix

* tts : add header + minor fixes

ggml-ci

* tts : add matchematical constant

ggml-ci

* tts : fix sampling + cut initial noise

* tts : fixes

* tts : update default samplers

ggml-ci

* tts : text pre-processing

* tts : outetts-voc -> wavtokenizer-dec

* tts : remove hardcoded constants

ggml-ci

* tts : fix tensor shapes

* llama : refactor wavtokenizer tensors

ggml-ci

* cont

ggml-ci

* cont [no ci]

* llama : update WavTokenizer to non-causal attn

* llama : handle no-vocab detokenization

* tts : add Python example for OuteTTS (wip)

* tts : extend python example to generate spectrogram

ggml-ci

* server : fix rebase artifacts

* tts : enable "return_tokens" in Python example

ggml-ci

* tts : minor fixes

* common : support HF download for vocoder
2025-01-04 10:45:01 +02:00
79b75ece03 tests: add tests for GGUF (llama/10830) 2025-01-04 10:45:01 +02:00
6348d73e55 ggml : improve inputs log sched_print_assignments (ggml/1053)
This commit attempts to improve the log message for the inputs of the
splits in the sched_print_assignments function.

The motivation for this change is that currently even if there are no
inputs a colon is displayed at the end of the line, which can make it a
little confusing when reading the output as it could be interpreted as
the line below are inputs when they are in fact nodes. With this change
the colon will only be printed if there actually are inputs.
2025-01-04 10:45:01 +02:00
fb36a1538a readme : fix real-time audio input example build instructions (#2692) 2025-01-02 12:05:38 +02:00
c81b8b910b objc : rename ggml-cpu-aarch64.c to .cpp (#2687) 2025-01-02 12:05:09 +02:00
85b60f31d0 docs : replace Core ML with OpenVINO (#2686) 2025-01-02 12:03:02 +02:00
227b5ffa36 make : fix "main" -> "whisper-cli" 2024-12-31 11:46:17 +02:00
36a64a253f ci : re-enable Windows cublas build (#2676)
* Enable Windows cublas build

* Re-add v12 cuda
2024-12-31 11:11:42 +02:00
c84b83c370 ruby : Fix of C++ header guard name, model URI support, type signature and more (#2683)
* Add test to make Whisper::Context.new accept URI string

* Add test to make Whisper::Context.new accept URI

* Make Whisper::Context.new accept URI string and URI

* Update README

Revert "Fix argument of rb_undefine_finalizer"

* Fix typos

* Add type signature file

* Assign literarl to const variable

* Load Whisper::Model::URI from Init_whisper

* Simplify .gitignore

* Don't load whisper.so from whisper/model/uri.rb

* Use each_with_object instead of each

* Add Development section to README

* Rename header guard to conform to C++ naming convention
2024-12-30 14:26:35 +02:00
5136fd92c2 examples : handle "main.exe" deprecation 2024-12-30 13:00:18 +02:00
7d55637f0b cli : add --suppress_nst support (#2664) 2024-12-24 09:30:07 +02:00
0994506054 cli : add no_speech_thold (#2663) 2024-12-24 09:29:19 +02:00
53c9a3a984 cmake : remove hardcoded install rpath 2024-12-23 21:22:10 +02:00
ed09075ca0 server : fix help print 2024-12-22 15:32:05 +02:00
f07a81aa9f ruby : bug fix on callbacks and no_speech_prob (#2656)
* Don't generate documentation on test

* Move .startup to TestBase class

* Extract new_segment_callback as a function

* Extract progress_callback as a function

* Extract abort_callback as a function

* Extract register_callbacks as a function

* Call callbacks in Whiser::Context#full and #full_parallel

* Fix README

* Care about the cases content-size is nil and TTY is not available

* Add tests for no_speech_prob

* Add Whisper::Context#full_get_segment_no_speech_prob and Whisper::Segment#no_speech_prob
2024-12-21 21:52:06 +02:00
4183517076 server : add no-speech threshold parameter and functionality (#2654) 2024-12-21 17:00:08 +02:00
f4668169a0 whisper : rename suppress_non_speech_tokens to suppress_nst (#2653) 2024-12-21 12:54:35 +02:00
944ce49439 server : add option to suppress non-speech tokens (#2649)
* The parameter will suppress non-speech tokens like [LAUGH], [SIGH], etc. from the output when enabled.

* add to whisper_params_parse

* add missing param
2024-12-21 12:05:05 +02:00
2e59dced12 whisper : rename binaries + fix install (#2648)
* whisper : rename binaries + fix install

* cont : try to fix ci

* cont : fix emscripten builds
2024-12-21 09:43:49 +02:00
e4e05981d6 ruby : update gem version to v1.3.1 2024-12-20 11:53:27 +02:00
3de9deead5 release : v1.7.3 2024-12-18 18:12:40 +02:00
47f989f9b3 ci : msys enable SDL2 build (#2635) 2024-12-18 12:52:41 +02:00
acc4e13dee ruby : sync ggml (#2643) 2024-12-18 12:52:16 +02:00
ba6c2a8fd9 android : try to fix build 2024-12-18 12:52:16 +02:00
6576af00d7 files : remove old sources 2024-12-18 12:52:16 +02:00
8ac5db0169 sync : ggml 2024-12-18 12:52:16 +02:00
61edb117a0 talk-llama : sync llama.cpp 2024-12-18 12:52:16 +02:00
eb97b257eb sync : ggml 2024-12-18 12:52:16 +02:00
479499dc0e ggml : update ggml_backend_cpu_device_supports_op (llama/10867)
* ggml : fix cpy op for IQ-quants to use reference impl

ggml-ci

* ggml : disable tests involving i-matrix quantization

* ggml : update ggml_backend_cpu_device_supports_op

ggml-ci
2024-12-18 12:52:16 +02:00
Eve
d420a759c5 vulkan: bugfixes for small subgroup size systems + llvmpipe test (llama/10809)
* ensure mul mat shaders work on systems with subgroup size less than 32

more fixes

add test

* only s_warptile_mmq needs to be run with 32 threads or more
2024-12-18 12:52:16 +02:00
a1ab9b5e91 rwkv6: add wkv6 support for Vulkan backend (llama/10829)
* rwkv_wkv6 vulkan shader

* RWKV_WKV6 Vulkan op tests passed

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Apply code format changes

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* add [[unroll]] and remove unnecessary conditions

* add uma support

* fix erros in EditorConfig Checker

---------

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
Co-authored-by: Molly Sophia <mollysophia379@gmail.com>
2024-12-18 12:52:16 +02:00
e22d38e4f2 llama : add Qwen2VL support + multimodal RoPE (llama/10361)
* Barebone Qwen2VL LLM convertor

* Add Qwen2VL cli entrypoint

* [WIP] add qwen2vl arch

* Verify m-rope output

* Add vl-rope/2d-rope support for qwen2vl ViT

* update qwen2vl cli tool

* update 5D tensor op workaround

* [WIP] qwen2vl vision model

* make batch and clip utils compatible with qwen2vl

* [WIP] create inference workflow, gguf convert script but fix

* correcting vision-rope behavior, add the missing last layer back to ViT

* add arg parser to qwen2vl_surgery

* replace variable size array with vector

* cuda-gdb cmake preset

* add fp32 mrope, vision rope kernel

* add fp16 support for qwen2vl and m-rope

* add `GGML_ROPE_TYPE_MROPE`, `GGML_ROPE_TYPE_VISION`

* fix rope op mode switching, out dated func args

* update `llama_hparams`

* update to keep up stream changes

* resolve linter, test errors

* add makefile entry, update speical image padding token

* add mrope unit test, fix few compiler warnings

* rename `mrope` related function, params

* minor updates on debug util, bug fixs

* add `m-rope` testcase to `test-backend-ops`

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* fix traililng whitespce

* store `llama_hparams.rope_sections` with fixed size array

* update position id tensor size check in GGML_OP_ROPE

* minor updates

* update `ggml_backend_*_supports_op` of unsupported backends

* remote old `rope_section` compare operator

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-18 12:52:16 +02:00
856fbaa92f Introducing experimental OpenCL backend with support for Qualcomm Adreno GPUs (llama/10693)
* [cl][adreno] Add Adreno GPU support

Add new OpenCL backend to support Adreno GPUs

---------

Co-authored-by: Skyler Szot <quic_sszot@quicinc.com>
Co-authored-by: Shangqing Gu <quic_shawngu@quicinc.com>
Co-authored-by: Alexander Angus <quic_aangus@quicinc.com>
Co-authored-by: Hongqiang Wang <quic_wangh@quicinc.com>
Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com>

* [cl][ci] Add workflow for CL

* [cl][adreno] Fix memory leak for non SMALL_ALLOC path

* opencl: integrate backend dyn.load interface and fix compiler and format warnings

* opencl: remove small-alloc support and fix build errors for non-opencl platforms

* opencl: fixed merge conflict (MUSA added twice in cmake)

* opencl-ci: use RUNNER_TEMP instead of github.workspace

* opencl: fix embed tool invocation with python3

* opencl: CI workflow fixes

* opencl: Clean up small-alloc in CMake files

* opencl: cleanup ggml-opencl2 header file

* opencl: use ulong for offsets and strides in ADD kernel

* opencl: use cl_ulong for all offsets

* opencl: use cl_ulong for sizes and strides

* opencl: use `GGML_LOG_xxx` instead of `fprintf(stderr, ...)`

* opencl: rename backend `opencl2` -> `opencl`

* opencl: rename kernel files `ggml-opencl2` -> `ggml-opencl`

* opencl: make OpenCL required, remove redundant lib and inc directories

* `ggml-base`, `..` and `.` are added by `ggml_add_backend_library`

* opencl: rename backend - funcs, structs, etc `opencl2` -> `opencl`

* opencl: remove copyright marker since main license already covers

* opencl: replace some more OPENCL2 leftovers

* opencl: remove limits on `tensor_extra`

* opencl: use pools for `tensor_extra`

* opencl: fix compiler warnings with GCC and Clang

Still getting the warning about clCreateCmdQueue being obsolete.
Will fix that separately.

* opencl: fail gracefully if opencl devices are not available

Also for unsupported GPUs.

* opencl: fix MSVC builds (string length error)

* opencl: check for various requirements, allow deprecated API

* opencl: update log message for unsupported GPUs

---------

Co-authored-by: Skyler Szot <quic_sszot@quicinc.com>
Co-authored-by: Shangqing Gu <quic_shawngu@quicinc.com>
Co-authored-by: Alexander Angus <quic_aangus@quicinc.com>
Co-authored-by: Hongqiang Wang <quic_wangh@quicinc.com>
Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com>
2024-12-18 12:52:16 +02:00
2c05efa4b1 Fix crash caused by ggml_backend_load_all when launching on Android Activity (llama/10812)
* Fix crash caused by ggml_backend_load_all when launching on AndroidActivity.

Details:
Calling ggml_backend_load_all during initialization in the AndroidActivity project leads to a crash with the error:
terminating with uncaught exception of type std::__ndk1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied [./].
This issue occurs because AndroidActivity restricts file access due to sandboxing.

Reproduction:
In the example folder, the LlamaAndroid project can reproduce the crash by calling ggml_backend_load_all first in Java_android_llama_cpp_LLamaAndroid_backend_1init.

* Update ggml/src/ggml-backend-reg.cpp

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-12-18 12:52:16 +02:00
Eve
c21fb10b28 vulkan: small mul_mat_vec optimizations (llama/10665)
* double the number of rows per workgroup

* Update ggml-vulkan.cpp

* Vulkan: Add VK_EXT_subgroup_size_control support to ensure full subgroups for coopmats

* only increase the number of rows for amd and subgroup size 64

* fix missing NUM_ROWS for mul_mat_vec_iq4_nl_f16_f32, untested

* use subgroup min and max to check for gcn (requires https://github.com/ggerganov/llama.cpp/pull/10721)

* manual merge ggml-vulkan.cpp

* set min and max subgroup size in any case

* Also double the number of rows for Intel GPUs
2024-12-18 12:52:16 +02:00
26c9fd0cdc SYCL: Reduce most of the compiler warnings (llama/10748)
* Try to reduce some unused and typecast warnings

* Reduce compiler warnings step 2

* add a newline at the end of the file

* Initialize nreduce as size_t

* [SYCL] Remove pragma directives from mmq.cpp

* SYCL: mmq add condition to prevent blocks_per_tile_x_row variable from becoming 0

* SYCL softmax: Initialize nreduce as size_t

* ggml-sycl.cpp: fix some trailing whitespaces

* SYCL: remove the unused variables instead of commenting it out

* SYCL poo2d kernel: set NAN for invalid pooling op

* SYCL gemm.hpp: remove pragma directives

* SYCL gemm.hpp: use const cast to properly support dnnl::memory

* SYCL: wkv6 remove a comment

* SYCL: clean comments step 2

* SYCL: clean comments and variables step 3

* SYCL: Use GGML_UNUSED for unused variables

* SYCL: remove extra empty lines and a comment

* Remove TODO

* cleanup spaces

* add a stdout for unsupported op

* use sycl printf over fprintf

* remove prints for CI

* SYCL ggml-sycl: pool2D use sycl::nan and remove if-else block

---------

Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
2024-12-18 12:52:16 +02:00
e6eed605cf ggml : Fix compilation issues on ARM platform when building without fp16 (llama/10811) 2024-12-18 12:52:16 +02:00
abe3102cb7 CUDA: faster non-contiguous concat (llama/10760)
* faster uncontiguous concat

* Use a lambda to avoid code duplication

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* Update ggml/src/ggml-cuda/concat.cu

* add constexpr  and static assert

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-12-18 12:52:16 +02:00
1193e494a9 remove CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS (llama/10797)
other windows build fixes
2024-12-18 12:52:16 +02:00
e5e951672e Vulkan: Use improved q4_k and q5_k dequant code in dequant shaders (llama/10798) 2024-12-18 12:52:16 +02:00
0e24559ad9 Vulkan: Add VK_EXT_subgroup_size_control support to ensure full subgroups for coopmats (llama/10721)
* Vulkan: Add VK_EXT_subgroup_size_control support to ensure full subgroups for coopmats

* Fix subgroup size control extension support check

Add accf32 and accf16 checks for coopmats

* Also disable coopmats on amdvlk
2024-12-18 12:52:16 +02:00
527ac800cf ggml: load all backends from a user-provided search path (llama/10699)
* feat: load all backends from a user-provided search path

* fix: Windows search path

* refactor: rename `ggml_backend_load_all_in_search_path` to `ggml_backend_load_all_from_path`

* refactor: rename `search_path` to `dir_path`

* fix: change `NULL` to `nullptr`

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* fix: change `NULL` to `nullptr`

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-12-18 12:52:16 +02:00
479bd77169 vulkan: request round-to-even for fp16 in im2col/rope_head (llama/10767)
Vulkan doesn't mandate a specific rounding mode, but the shader_float_controls
feature allows rounding mode to be requested if the implementation supports it.
2024-12-18 12:52:16 +02:00
Eve
d8bf63a41b vulkan: dynamic subgroup size for the remaining k quants (llama/10745)
* q5_k

q4_k

q3_k

q2_k

q6_k multi row example

* revert as multi row isnt faster for k quants
2024-12-18 12:52:16 +02:00
b82c8d76dc CUDA: rename macros to avoid conflicts with WinAPI (llama/10736)
* Renames NVIDIA GPU-architecture flags to avoid name clashes with WinAPI. (e.g. CC_PASCAL, GPU architecture or WinAPI pascal compiler flag?)

* Reverts erroneous rename in SYCL-code.

* Renames GGML_CUDA_MIN_CC_DP4A to GGML_CUDA_CC_DP4A.

* Renames the rest of the compute capability macros for consistency.
2024-12-18 12:52:16 +02:00
86346f811e vulkan: disable spirv-opt for coopmat shaders (llama/10763)
There are some bugs in the 1.3.296 SDK, so disable this. It isn't strictly
necessary anyway.

Add missing dependency on vulkan-shaders-gen, so shaders get recompiled when it
changes.

Fix coopmat support reporting when glslc doesn't support NV_coopmat2.
2024-12-18 12:52:16 +02:00
c635f40a34 ggml : remove return from ggml_gallocr_allocate_node (ggml/1048)
This commit removes the return statement from ggml_gallocr_allocate_node
function.

The motivation behind this change is to make the code more readable and
consistent.
2024-12-18 12:52:16 +02:00
e0be0de1ee ggml : add check for grad_accs (ggml/1046)
* ggml : add check for grad_accs

This commit adds a check for grad_accs in ggml_graph_get_grad and
ggml_graph_get_grad_acc functions. This is necessary to avoid segfaults
when grad_accs is not initialized.

The motivation for this change is that I find it nice to be able to
print out a computation graph using ggml_graph_print but this function
segfaults when grad_accs is not initialized:
```console
(gdb) p g1
$2 = (ggml_cgraph *) 0x7ffff66004b0
(gdb) p *g1
$3 = {size = 2048, n_nodes = 1, n_leafs = 2, nodes = 0x7ffff6600500,
grads = 0x0, grad_accs = 0x0, leafs = 0x7ffff6604500,
visited_hash_set = {size = 4099, used = 0x7ffff6610518,
keys = 0x7ffff6608500}, order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT}
(gdb) p ggml_graph_print(g1)
=== GRAPH ===
n_nodes = 1

Program received signal SIGSEGV, Segmentation fault.
0x0000555555579775 in ggml_graph_get_grad
(cgraph=0x7ffff66004b0,node=0x7ffff6600340)
    at /ggml/ggml/src/ggml.c:5990
5990  return igrad != GGML_HASHSET_FULL &&
          ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ?
          cgraph->grads[igrad] : NULL;
```

* squash! ggml : add check for grad_accs

Fix the check in ggml_graph_get_grad. The check was incorrectly using
cgraph->grad_accs instead of cgraph->grads.
2024-12-18 12:52:16 +02:00
60dc6d003f common : remove old types
ggml-ci
2024-12-18 12:52:16 +02:00
eb27e0d834 CUDA: fix shared memory access condition for mmv (llama/10740) 2024-12-18 12:52:16 +02:00
a682fdce0c vulkan: fix compile warnings (llama/10731) 2024-12-18 12:52:16 +02:00
9ffbd3d969 Vulkan: fix NaN in tanh.comp with AMD proprietary driver on Windows (llama/10723)
* Vulkan: fix NaN in tanh.comp

* Faster NaN-free tanh
2024-12-18 12:52:16 +02:00
6585a890b4 vulkan: compile a test shader in cmake to check for coopmat2 support (llama/10713) 2024-12-18 12:52:16 +02:00
d0a050b51f ggml : disable iq4_nl interleave size 8 (llama/10709)
ggml-ci
2024-12-18 12:52:16 +02:00
e990d1b791 ggml : refactor online repacking (llama/10446)
* rename ggml-cpu-aarch64.c to .cpp

* reformat extra cpu backend.

- clean Q4_0_N_M and IQ4_0_N_M
  - remove from "file" tensor type
  - allow only with dynamic repack

- extract cpu extra bufts and convert to C++
  - hbm
  - "aarch64"

- more generic use of extra buffer
  - generalise extra_supports_op
  - new API for "cpu-accel":
     - amx
     - aarch64

* clang-format

* Clean Q4_0_N_M ref

Enable restrict on C++

* add op GGML_OP_MUL_MAT_ID for Q4_0_N_M with runtime repack

* added/corrected control on tensor size for Q4 repacking.

* Update ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* add debug logs on repacks.

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-18 12:52:16 +02:00
4a6d52efe6 Vulkan: VK_KHR_cooperative_matrix support to speed up prompt processing (llama/10597)
* Vulkan: Implement VK_KHR_cooperative_matrix support in the matrix matrix multiplication shader

* Improve performance with better q4_k and q5_k dequant and store unrolling

* Add Vulkan MUL_MAT and MUL_MAT_ID accumulator precision selection

* Rework mulmat shader selection and compilation logic, avoid compiling shaders that won't get used by device

* Vulkan: Implement accumulator switch for specific mul mat mat shaders

* Vulkan: Unroll more loops for more mul mat mat performance

* Vulkan: Add VK_AMD_shader_core_properties2 support to read Compute Unit count for split_k logic

* Disable coopmat support on AMD proprietary driver

* Remove redundant checks

* Add environment variable GGML_VK_DISABLE_COOPMAT to disable VK_KHR_cooperative_matrix support

* Fix rebase typo

* Fix coopmat2 MUL_MAT_ID pipeline selection
2024-12-18 12:52:16 +02:00
8b841d430a metal : Extend how Llama.cpp locates metal resources (llama/10676)
* metal : Extend how Llama.cpp locates metal resources (llama/10675)

  * It searches the resource file in the directory where the current
    binary is located as well.
  * Resolves symbolic links.

Rationale:

When we plug this dependency into a Bazel build and run it in the
context of Bazel (e.g. testing):

  * the execution directory is often very different from where the files
    are located and no direct control over this (Bazel sandboxing),
  * the Bazel sandbox often use symbolic links to make files available.

With this patch, we can have the resource file added to the target,
can build and run tests in the context of Bazel.

* Update ggml/src/ggml-metal/ggml-metal.m

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/src/ggml-metal/ggml-metal.m

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-18 12:52:16 +02:00
b74b68212a vulkan: Add VK_NV_cooperative_matrix2 support for mul_mat and flash attention (llama/10206) 2024-12-18 12:52:16 +02:00
3a27b2b91b ruby : Add no_speech_thold (#2641)
* Remove Whisper::Model.[]

* Fix Whisper::Model::URI#request

* Make Whisper::Context#initialize accept pre-converted model name

* Use downloading pre-converted model feature for testing

* Update README

* Remove unnecessary task

* Move whisper/model.rb -> whisper/model/uri.rb

* Update document comment of Whisper::Context#initialize

* Don't show download progress when not tty

* Pass String to raise

* Use cache model file if download fails

* Add test for auto download

* Specify required Ruby version

* Fix a typo

* Remove unnecessary flags

* Initialize Whisper::Params#diarize explicitely

* Remove redundant code from README for simplicity

* Add Whisper::Params#no_speech_thold attribute

* Add test for Whisper::Params#no_speech_thold
2024-12-18 11:00:50 +02:00
d34445e960 stream : improve consistency in README (#2642) 2024-12-18 08:43:48 +02:00
f897eb7670 whisper : support no_speech_thold (#2625)
* Implement no_speech_thold

no_speech_thold functionality is on par with OpenAI's whisper

* Addressed review comments
2024-12-17 19:15:47 +02:00
2f2841bfce whisper : add single-timestamp logic (#2629)
* Fix hallucinations during silence

When the predicted tokens end with a single timestamp the the entire 30 segment should be considered as done, to avoid hallucinations for the remaining part of segment.
This behaviour is on par with openai's whisper. Refer to logic related to `single_timestamp_ending` in https://github.com/openai/whisper/blob/main/whisper/transcribe.py

* Accept review comments related to formatting.

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-17 19:07:08 +02:00
09a1b61218 readme : fix typo (#2637) 2024-12-17 19:05:35 +02:00
94e7da1ff2 cmake : fix "amd64" processor string (#2638) 2024-12-17 18:34:32 +02:00
c4aed6831e vulkan : fix soft_max.comp division by zero (#2633)
This change prevents a division by zero error when p.KY is 0.
2024-12-16 12:34:38 +02:00
199579652e common : add cstdio header 2024-12-16 08:57:04 +02:00
d17e7139d8 stream : update build instructions 2024-12-15 21:55:36 +02:00
6a52eaea74 android : fix build and ci (#2624)
* Adding missing CMakeLists.txt include for ggm-cpu needed by whisper.android

* attempt to re-enable CI for JNI android

---------

Co-authored-by: Your Name <you@example.com>
2024-12-14 17:25:53 +02:00
6aa1d7b892 models : fix typo in download-ggml-model.sh (#2623)
Introduced in #2589
2024-12-12 18:02:00 +02:00
262e865a70 ruby : Sync whisper.cpp and model download feature (#2617)
* Use C++17

* Add test for Pathname of model

* Make Whisper::Context#initialize accept Pathname

* Add shorthand for pre-converted models

* Update documents

* Add headings to API section in README [skip ci]

* Remove unused function

* Don't care about no longer included file

* Cosmetic fix

* Use conditional get when get model files
2024-12-09 13:17:50 +02:00
ed733e85a1 scripts : update to new build system 2024-12-09 11:30:16 +02:00
5980b1ae77 devops : add cmake 2024-12-08 23:09:26 +02:00
0415a66044 devops : update make commands 2024-12-08 23:07:29 +02:00
7d134e3737 ggml : remove old files (skip) (#0) 2024-12-08 23:04:26 +02:00
9df53b357e ggml : sync remnants (skip) (#0) 2024-12-08 22:48:25 +02:00
b2115b4d9b scripts : remove amx from sync 2024-12-08 22:48:14 +02:00
0164427dd5 ci : disable freeBSD builds [no ci] 2024-12-08 20:14:35 +02:00
627b11c78a readme : update build instructions 2024-12-08 20:14:35 +02:00
472464453d ci : disable CUDA and Android builds 2024-12-08 20:14:35 +02:00
11dddfbc9e ci : disable Obj-C build + fixes 2024-12-08 20:14:35 +02:00
384e214cc7 make : shim cmake 2024-12-08 20:14:35 +02:00
f2c680f893 talk-llama : sync llama.cpp 2024-12-08 20:14:35 +02:00
fbe66da0e5 sync : ggml 2024-12-08 20:14:35 +02:00
a815940e0e ggml : add predefined list of CPU backend variants to build (llama/10626)
* ggml : add predefined list of CPU backend variants to build

* update CPU dockerfiles
2024-12-08 20:14:35 +02:00
904e307bce ggml-cpu : fix HWCAP2_I8MM value (llama/10646) 2024-12-08 20:14:35 +02:00
491ec076b4 vulkan: Implement "fast divide" (mul+shift) for unary ops like copy (llama/10642) 2024-12-08 20:14:35 +02:00
966433fdf2 SYCL : Move to compile time oneMKL interface backend selection for NVIDIA backend (llama/10584)
* [SYCL] Move to Compile Time backend selection on oneMKL Interface for NVIDIA backend

Move to compile time selection to backend to avoid latency at run time.
Add it to all mkl gemm calls and only for NVIDIA backend.

Signed-off-by: nscipione <nicolo.scipione@codeplay.com>

* Formatting

* Address PR comments to increase readibility

---------

Signed-off-by: nscipione <nicolo.scipione@codeplay.com>
2024-12-08 20:14:35 +02:00
6f1ba9d82d Avoid using __fp16 on ARM with old nvcc (llama/10616) 2024-12-08 20:14:35 +02:00
015ecd0001 vulkan: optimize and reenable split_k (llama/10637)
Use vector loads when possible in mul_mat_split_k_reduce. Use split_k
when there aren't enough workgroups to fill the shaders.
2024-12-08 20:14:35 +02:00
PAB
b7c64a4352 ggml: add GGML_SET Metal kernel + i32 CPU kernel (ggml/1037)
* implemented cpu kernel

* add i32 test cases in test-backend-ops

* typedef `ggml_metal_kargs_set`

* implemented `kernel_set`

* memcpy
2024-12-08 20:14:35 +02:00
PAB
7895d39508 ggml : add GGML_PAD_REFLECT_1D operation (ggml/1034)
* ggml_pad_reflect_1d defined in header

* implemented on CPU

* called the forward pass

* impl Metal kernel

* added Metal kernel

* added OP_PAD_REFLECT_1D in test-backend-ops.cpp

* add test-pad-reflect-1d test case

* test case support multiple backend
2024-12-08 20:14:35 +02:00
22616f00f9 files : remove make artifacts 2024-12-08 20:14:35 +02:00
02c6fcbc2c common : fix compile warning
ggml-ci
2024-12-08 20:14:35 +02:00
3daeacad24 ggml : move AMX to the CPU backend (llama/10570)
ggml : automatic selection of best CPU backend (llama/10606)
2024-12-08 20:14:35 +02:00
4d73962da4 metal : small-batch mat-mul kernels (llama/10581)
* metal : small-batch mat-mul kernels

ggml-ci

* metal : add rest of types

ggml-ci

* metal : final adjustments

ggml-ci

* metal : add comments

ggml-ci
2024-12-08 20:14:35 +02:00
068812650e SYCL: Fix and switch to GGML_LOG system instead of fprintf (llama/10579)
* Switched to GGML_LOG

* Fix missing semicolon
2024-12-08 20:14:35 +02:00
4b7e059e15 ggml-cpu: replace AArch64 NEON assembly with intrinsics in ggml_gemv_q4_0_4x4_q8_0() (llama/10567)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2024-12-08 20:14:35 +02:00
Eve
30e35d7271 vulkan: Dynamic subgroup size support for Q6_K mat_vec (llama/10536)
* subgroup 64 version with subgroup add. 15% faster

scalable version

tested for subgroup sizes 16-128

* check for subgroup multiple of 16 and greater than 16

* subgroup sizes are always a power of 2 (https://github.com/KhronosGroup/GLSL/issues/45)

* force 16 sequential threads per block

* make 16 subgroup size a constant
2024-12-08 20:14:35 +02:00
3623bd58f2 ggml : fix I8MM Q4_1 scaling factor conversion (llama/10562)
ggml-ci
2024-12-08 20:14:35 +02:00
cb847c20a7 ggml-cpu: fix typo in gemv/gemm iq4_nl_4_4 (llama/10580) 2024-12-08 20:14:35 +02:00
964b154a2a sycl : offload of get_rows set to 0 (llama/10432) 2024-12-08 20:14:35 +02:00
d7c2a04bce sycl : Reroute permuted mul_mats through oneMKL (llama/10408)
This PR fixes the failing MUL_MAT tests for the sycl backend.
2024-12-08 20:14:35 +02:00
2bb4ca9cba CANN: RoPE operator optimization (llama/10563)
* [cann] RoPE operator optimization

* [CANN]Code Formatting

---------

Co-authored-by: noemotiovon <noemotiovon@gmail.com>
2024-12-08 20:14:35 +02:00
a753a82462 vulkan: get the first command buffer submitted sooner (llama/10499)
This is an incremental improvement over #9118 to get work to the GPU a bit
sooner. The first part is to start with a smaller number of nodes before
the first submit, and ramp it up to the current 100 nodes/submit. The
second part is to reduce the dryrun overhead for all the nodes that just
need to request descriptor space.

With these changes I get around 1-2% speedup on RTX 4070 combined with my
old Haswell-era CPU.
2024-12-08 20:14:35 +02:00
276b08d8f0 ggml : remove redundant copyright notice + update authors 2024-12-08 20:14:35 +02:00
4ca1e72fe0 ggml : fix row condition for i8mm kernels (llama/10561)
ggml-ci
2024-12-08 20:14:35 +02:00
16a66f103f cmake : fix ARM feature detection (llama/10543)
ggml-ci
2024-12-08 20:14:35 +02:00
330273901f ggml-cpu: support IQ4_NL_4_4 by runtime repack (llama/10541)
* ggml-cpu: support IQ4_NL_4_4 by runtime repack

* ggml-cpu: add __ARM_FEATURE_DOTPROD guard
2024-12-08 20:14:35 +02:00
42099a9342 kompute : improve backend to pass test_backend_ops (llama/10542)
* kompute: op_unary: reject unsupported parameters

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: softmax: implement ALiBi support

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: rope: implement neox and phi3 support

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: op_mul_mat_q4_k permutted support

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: op_mul_mat_[q4_0|q4_1|q8_0] permutted support

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: op_mul_mat_f16 permutted support

Signed-off-by: Sergio Lopez <slp@redhat.com>

* kompute: op_mul_mat_q6_k permutted support

Signed-off-by: Sergio Lopez <slp@redhat.com>

---------

Signed-off-by: Sergio Lopez <slp@redhat.com>
2024-12-08 20:14:35 +02:00
90dd5fca9c CANN: Fix SOC_TYPE compile bug (llama/10519)
* CANN: Fix the bug build fail on Ascend310P under two cases:
1) Manual specify SOC_TYPE
2) Under some unusual compile environment

* Update the cann backend News content: Support F16 and F32 data type model for Ascend 310P NPU.

* fix CANN  compile fail bug: the assert in ascend kernel function doesn't supportted on some CANN version
2024-12-08 20:14:35 +02:00
2490f2a7f8 CANN: ROPE operator optimization (llama/10540)
* [cann] ROPE operator optimization

Co-authored-by: noemotiovon <noemotiovon@gmail.com>
2024-12-08 20:14:35 +02:00
230e985633 Add some minimal optimizations for CDNA (llama/10498)
* Add some minimal optimizations for CDNA

* ggml_cuda: set launch bounds also for GCN as it helps there too
2024-12-08 20:14:35 +02:00
ae24083f23 metal : fix group_norm support condition (llama/0) 2024-12-08 20:14:35 +02:00
6463e36369 vulkan: define all quant data structures in types.comp (llama/10440) 2024-12-08 20:14:35 +02:00
b3301f7d82 vulkan: Handle GPUs with less shared memory (llama/10468)
There have been reports of failure to compile on systems with <= 32KB
of shared memory (e.g. #10037). This change makes the large tile size
fall back to a smaller size if necessary, and makes mul_mat_id fall
back to CPU if there's only 16KB of shared memory.
2024-12-08 20:14:35 +02:00
ab5d4d93ec vulkan: further optimize q5_k mul_mat_vec (llama/10479) 2024-12-08 20:14:35 +02:00
2d6e9dd723 vulkan: skip integer div/mod in get_offsets for batch_idx==0 (llama/10506) 2024-12-08 20:14:35 +02:00
2f16e51553 vulkan: optimize Q2_K and Q3_K mul_mat_vec (llama/10459) 2024-12-08 20:14:35 +02:00
0f0994902f mtgpu: Add MUSA_DOCKER_ARCH in Dockerfiles && update cmake and make (llama/10516)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2024-12-08 20:14:35 +02:00
5e1fcc1780 vulkan: fix group_norm (llama/10496)
Fix bad calculation of the end of the range. Add a backend test that
covers the bad case (taken from stable diffusion).

Fixes https://github.com/leejet/stable-diffusion.cpp/issues/439.
2024-12-08 20:14:35 +02:00
48f421de23 cmake : enable warnings in llama (llama/10474)
* cmake : enable warnings in llama

ggml-ci

* cmake : add llama_get_flags and respect LLAMA_FATAL_WARNINGS

* cmake : get_flags -> ggml_get_flags

* speculative-simple : fix warnings

* cmake : reuse ggml_get_flags

ggml-ci

* speculative-simple : fix compile warning

ggml-ci
2024-12-08 20:14:35 +02:00
e7afb2b991 ggml-cpu: cmake add arm64 cpu feature check for macos (llama/10487)
* ggml-cpu: cmake add arm64 cpu feature check for macos

* use vmmlaq_s32 for compile option i8mm check
2024-12-08 20:14:35 +02:00
9a5ef7b169 CANN: Improve the Inferencing Performance for Ascend NPU Device (llama/10454)
* improve inferencing performance for ascend npu.

Co-authored-by: Frank Mai <thxCode@thxcode0824@gmail.com>

* some modification after review

* some modifications after review

* restore some modifications

* restore some modifications

---------

Co-authored-by: shanshan shen <shanshanshen333@gmail.com>
Co-authored-by: Frank Mai <thxCode@thxcode0824@gmail.com>
2024-12-08 20:14:35 +02:00
453cc0fcf1 CANN: RoPE and CANCAT operator optimization (llama/10488)
Co-authored-by: noemotiovon <noemotiovon@gmail.com>
2024-12-08 20:14:35 +02:00
78dfec6bc5 vulkan: Fix a vulkan-shaders-gen arugment parsing error (llama/10484)
The vulkan-shaders-gen was not parsing the --no-clean argument correctly.
Because the previous code was parsing the arguments which have a value only
and the --no-clean argument does not have a value, it was not being parsed
correctly. This commit can now correctly parse arguments that don't have values.
2024-12-08 20:14:35 +02:00
f6d518fc4c metal : enable mat-vec kernels for bs <= 4 (llama/10491) 2024-12-08 20:14:35 +02:00
ac33379a35 llama : accept a list of devices to use to offload a model (llama/10497)
* llama : accept a list of devices to use to offload a model

* accept `--dev none` to completely disable offloading

* fix dev list with dl backends

* rename env parameter to LLAMA_ARG_DEVICE for consistency
2024-12-08 20:14:35 +02:00
77e3e4a090 ggml : add support for dynamic loading of backends (llama/10469)
* ggml : add support for dynamic loading of backends

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-08 20:14:35 +02:00
b840bb09be metal : minor code formatting 2024-12-08 20:14:35 +02:00
8b1c1c30a7 ggml : do not use ARM features not included in the build (llama/10457) 2024-12-08 20:14:35 +02:00
4b81335f75 CANN: Support Ascend310P to accelerate F32 and F16 Model (llama/10216)
* CANN Support Ascend310P to accelerate F32 and F16 Model

* Add compile option soc type macro ASCEND_310P to ggml-cann lib

* Remove unused code

* Remove the ascend soc_type hard code compile option in CMakelist.txt
2024-12-08 20:14:35 +02:00
2a4b5c9d7e cuda : optimize argmax (llama/10441)
* cuda : optimize argmax

* remove unused parameter

ggml-ci

* fixup : use full warps

ggml-ci

* Apply suggestions from code review

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

* fix ub

* ggml : check ne00 <= INT32_MAX in argmax and argsort

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2024-12-08 20:14:35 +02:00
04662748aa vulkan: predicate max operation in soft_max shaders/soft_max (llama/10437)
Fixes #10434
2024-12-08 20:14:35 +02:00
a117279e13 vulkan: copy iq4_nl LUT into shared memory (llama/10409) 2024-12-08 20:14:35 +02:00
bbb292ed38 vulkan: further optimize mul_mat_vec using larger loads (llama/10387)
* vulkan: Use pipeline_robustness to disable robustness in mul_mat_vec.

Add some early returns for nonexistent rows in mul_mat_vec shaders. These
can only be hit when dispatching a 2D grid of workgroups. Fix the logic
for the 2D grid of workgroups to round up.

Enable the pipeline robustness extension if it's available, and use it to
disable robustness for these pipelines. The instructions to do the bounds
checking contend for the same ALU resources as the bit twiddling dequant
instructions.

* vulkan: Add GLSL structure aliases for quant types to allow larger loads

In Vulkan it's not possible to cast pointer types, so instead you have to
declare an aliased binding for the memory with a different type. This
commit adds aliases for the quant formats using 16b ints, and in a few
places where the struct size is a multiple of 4 also using 32b ints.
Currently only q4_k's aliases are used, but others will be used in
subsequent commits.

* vulkan: use larger loads in q5_k and q6_k shaders.

Similar to the optimization I did in q4_k recently, this vectorizes some loads
and reduces the number of bit twiddling instructions.

* vulkan: use larger K step per iteration in mul_mat_vec.

Add vec4 dequantization functions, and use them to do K=8 per iteration in
mul_mat_vec. This uses 16b loads for the quant values and 128b loads for B
which helps reduce the load on the memory system.

The K_PER_ITER==2 logic is still there, just for F16/F32, and really only
because they support unaligned sizes.

Tweak the num_iters/unrolling logic to be simpler and catch a couple missed
unrolling opportunities.
2024-12-08 20:14:35 +02:00
95e8901e71 add cmake rvv support (llama/10411) 2024-12-08 20:14:35 +02:00
4af9626702 CUDA: remove unnecessary warp reduce in FA (ggml/1032)
* kqmax_new_j in every thread within warp is same after operate at line 199,this reduce can be omit

* same problem in vec32

---------

Co-authored-by: ZhaoXiaoYu <zhao.xiaoyu@zte.com.cn>
2024-12-08 20:14:35 +02:00
PAB
c52d1035de feat: add GGML_UNARY_OP_ARGMAX Metal kernel (ggml/1019)
* implemented argmax kernel

* tpig -> tgpig

* change to strides

* contiguous assertions

* kernel working and tested

* argmax simd parallel implementation

* added 2 new tests for argmax in test-backend-ops

* cosmit

* added 3 tests cases for perf eval

* add test_argmax in make_test_cases_perf

* Update test-backend-ops.cpp

Co-authored-by: Diego Devesa <slarengh@gmail.com>

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-12-08 20:14:35 +02:00
PAB
5773a14980 metal : add GGML_OP_CONV_TRANSPOSE_1D kernels (ggml/1026)
* wip

* wip implementation f32

* kernel conv transpose 1d f32 working

* initial commit
2024-12-08 20:14:35 +02:00
6939147c47 Do not include arm_neon.h when compiling CUDA code (ggml/1028) 2024-12-08 20:14:35 +02:00
98f9916c9f ggml-opt: fix data corruption (ggml/1022) 2024-12-08 20:14:35 +02:00
021eef1000 ruby : Add low-level methods to transcribe (#2585)
* Add tests for Whisper::Context#full

* Add Whisper::Context#full

* Add tests for Whisper::Error

* Add document of Whisper::Context#full [skip ci]

* Add additional signature for Whisper::Context#full

* Add description to Whisper::Context#full

* Add test for Whisper::Context#full_parallel

* Add Whisper::Context#full_parallel

* Hide Whisper's instance methods from Ruby code

* Add class to test MemoryView

* Build test class before running test

* Add test for MemoryView

* Make Whisper::Context#full and #full_parallel accept MemoryView

* Use Ruby 3.1 on CI

* Add comment on samples data type

* Update README

* Update README

* Remove unused code
2024-11-28 10:33:07 +02:00
a9d06ce151 models : add q8_0 models to download-ggml-model.sh (#2589) 2024-11-28 10:31:54 +02:00
8c6a9b8bb6 ruby : Follow source tree change (#2580)
* Follow whisper.cpp source tree change

* Update whispercpp.gemspec

* Follow whisper.cpp log level change

* Fix paths in GitHub workflow for Ruby bindings

* Use GitHub workflow setting for dependency definition

* Use ternary operator
2024-11-21 17:04:29 +02:00
37c88027e1 whisper : use backend registry (#0) 2024-11-20 21:00:08 +02:00
9db070a3c5 ggml/sched : do not skip views in pre-assignments 2024-11-20 21:00:08 +02:00
7fd8d9c220 whisper : adapt to new ggml (wip) 2024-11-20 21:00:08 +02:00
06e059b8f8 talk-llama : sync llama.cpp 2024-11-20 21:00:08 +02:00
c9f49d5f9d sync : ggml 2024-11-20 21:00:08 +02:00
f4c1d7df39 ggml : sync resolve (skip) (#0) 2024-11-20 21:00:08 +02:00
339b8e559c Add required ggml-base and backend libs to cmake pkg (llama/10407) 2024-11-20 21:00:08 +02:00
5f6d6919b4 cuda : fix CUDA_FLAGS not being applied (llama/10403) 2024-11-20 21:00:08 +02:00
8ee767732f sycl : Add option to set the SYCL architecture for all targets (llama/10266)
* Add option to set the SYCL architecture for all targets
* Convert GGML_SYCL_HIP_TARGET to the more generic GGML_SYCL_ARCH option
* Document that setting GGML_SYCL_ARCH can improve the performance
2024-11-20 21:00:08 +02:00
45f1f9144f vulkan: Optimize soft_max (llama/10301)
* vulkan: Optimize soft_max

Large soft_max could already saturate memory, but small/medium sizes were
pretty slow. The bulk of the gains for them comes from using a smaller
workgroup size, and making the workgroup size match the subgroup size also
makes the barriers much cheaper.

Cache some values in locals to avoid refetching/recomputing. And stamp
out a few "template instantiations" so smaller cases will fully unroll.

Add a missing early return for OOB rows. This happens when there are more
than 512 rows and the dispatch is 512 x H.

* vulkan: Further soft_max optimizations

Restore the workgroup size of 512 case, use it for >1024.

Use unrollable loops for more iteration counts.
2024-11-20 21:00:08 +02:00
53589c8f12 sycl: Revert MUL_MAT_OP support changes (llama/10385) 2024-11-20 21:00:08 +02:00
7ac2f17fac cuda : only use native when supported by cmake (llama/10389) 2024-11-20 21:00:08 +02:00
48862c7b27 vulkan: remove use of null initializer (llama/10372)
Seems like this isn't working for vulkan-over-metal when the array is sized
by a spec constant. Maybe a spirv-cross limitation?
2024-11-20 21:00:08 +02:00
44f7d9f4e3 metal : fox offset integer overflows in im2col (ggml/1015)
-- While running StableDiffusion.cpp locally with Metal some offsets overflow and results in incorrect calculations
2024-11-20 21:00:08 +02:00
fd12302587 Vulkan: Fix device info output format specifiers (llama/10366)
* Vulkan: Fix device info output format specifiers

* Vulkan: Use zu printf specifier for size_t instead of ld
2024-11-20 21:00:08 +02:00
PAB
f80bef4630 metal : add GGML_UNARY_OP_ELU kernel (ggml/1018) 2024-11-20 21:00:08 +02:00
161b443514 CUDA: fix MMV kernel being used for FP16 src1 (llama/10357) 2024-11-20 21:00:08 +02:00
ef7fbe1c66 CMake: fix typo in comment [no ci] (llama/10360) 2024-11-20 21:00:08 +02:00
0879d3599e llama : only use default buffer types for the KV cache (llama/10358) 2024-11-20 21:00:08 +02:00
2a444dc5bd metal : refactor kernel args into structs (llama/10238)
* metal : add kernel arg structs (wip)

* metal : fattn args

ggml-ci

* metal : cont + avoid potential int overflow [no ci]

* metal : mul mat struct (wip)

* cont : mul mat vec

* cont : pass by reference

* cont : args is first argument

* cont : use char ptr

* cont : shmem style

* cont : thread counters style

* cont : mul mm id

ggml-ci

* cont : int safety + register optimizations

ggml-ci

* metal : GGML_OP_CONCAT

ggml-ci

* metal : GGML_OP_ADD, GGML_OP_SUB, GGML_OP_MUL, GGML_OP_DIV

* metal : GGML_OP_REPEAT

* metal : GGML_OP_CPY

* metal : GGML_OP_RMS_NORM

* metal : GGML_OP_NORM

* metal : add TODOs for rest of ops

* ggml : add ggml-metal-impl.h

ggml-ci
2024-11-20 21:00:08 +02:00
45cf1634dc ggml : fix undefined reference to 'getcpu' (llama/10354)
https://github.com/ggerganov/llama.cpp/issues/10352
2024-11-20 21:00:08 +02:00
dcb2922d1d CUDA: remove DMMV, consolidate F16 mult mat vec (llama/10318) 2024-11-20 21:00:08 +02:00
3c5c751174 CMake: default to -arch=native for CUDA build (llama/10320) 2024-11-20 21:00:08 +02:00
24ad19d0e9 ggml : fix possible buffer use after free in sched reserve (llama/9930) 2024-11-20 21:00:08 +02:00
bd574b05af ggml : inttypes.h -> cinttypes (llama/0)
ggml-ci
2024-11-20 21:00:08 +02:00
7e0eafcb1e ggml : adapt AMX to tensor->grad removal (llama/0)
ggml-ci
2024-11-20 21:00:08 +02:00
75670ae673 ggml : fix compile warnings (llama/0)
ggml-ci
2024-11-20 21:00:08 +02:00
d4fcdf602b llamafile : fix include path (llama/0)
ggml-ci
2024-11-20 21:00:08 +02:00
1bebb1a116 vulkan: Optimize some mat-vec mul quant shaders (llama/10296)
Compute two result elements per workgroup (for Q{4,5}_{0,1}). This reuses
the B loads across the rows and also reuses some addressing calculations.
This required manually partially unrolling the loop, since the compiler
is less willing to unroll outer loops.

Add bounds-checking on the last iteration of the loop. I think this was at
least partly broken before.

Optimize the Q4_K shader to vectorize most loads and reduce the number of
bit twiddling instructions.
2024-11-20 21:00:08 +02:00
ee437cde59 ggml : optimize Q4_0 into Q4_0_X_Y repack (llama/10324) 2024-11-20 21:00:08 +02:00
c1506d38cf Make updates to fix issues with clang-cl builds while using AVX512 flags (llama/10314) 2024-11-20 21:00:08 +02:00
c9541741e6 ggml: new optimization interface (ggml/988)
* ggml: new optimization interface

remove test2.c, test3.c

store adamw params in tensor

move grads from tensor to graph

* avoid segfault upon API misuse

* add ggml-opt.h to public headers

* remove dependence of ggml-opt.cpp on ggml-cpu.h
2024-11-20 21:00:08 +02:00
6a55015dc4 ggml : remove duplicated sources from the last sync (ggml/1017)
* ggml : remove duplicated sources from the last sync

ggml-ci

* cont : remove FindSIMD.cmake [no ci]
2024-11-20 21:00:08 +02:00
7e86030d4d ggml : fix some build issues 2024-11-20 21:00:08 +02:00
401fbea326 sync : leftovers (ggml/0)
ggml-ci
2024-11-20 21:00:08 +02:00
44d1cbdfe9 cmake : restore CMakeLists.txt (llama/10256)
ggml-ci
2024-11-20 21:00:08 +02:00
Eve
3216efef2e AVX BF16 and single scale quant optimizations (llama/10212)
* use 128 bit loads (i've tried 256->128 to death and its slower)

* double accumulator

* avx bf16 vec dot

* +3% q4_0 inference

* +7% tg +5% pp compared to master

* slower f16c version, kep for reference

* 256b version, also slow. i tried :)

* revert f16

* faster with madd

* split to functions

* Q8_0 and IQ4_NL, 5-7% faster

* fix potential overflow (performance reduced)

* 16 bit add for q4_0 only

* merge
2024-11-20 21:00:08 +02:00
2c0484ebf7 sycl: Use syclcompat::dp4a (llama/10267)
* sycl: Use syclcompat::dp4a

* Using the syclcompat version allow the compiler to optimize the
  operation with native function

* Update news section

* Update CI Windows oneAPI version to 2025.0

* Reword doc

* Call syclcompat::dp4a inside dpct::dp4a

This reverts commit 90cb61d692d61360b46954a1c7f780bd2e569b73.
2024-11-20 21:00:08 +02:00
3298916e5e backend cpu: add online flow for aarch64 Q4_0 GEMV/GEMM kernels (llama/9921)
* backend-cpu: add online flow for aarch64 Q4_0 GEMV/GEMM kernels

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-11-20 21:00:08 +02:00
746bf2596f ggml : build backends as libraries (llama/10256)
* ggml : build backends as libraries

---------

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
2024-11-20 21:00:08 +02:00
5f7e094ccb scripts : update sync 2024-11-20 21:00:08 +02:00
6266a9f9e5 release : v1.7.2 2024-11-19 18:54:22 +02:00
d24f981fb2 sycl: fix example build (#2570) 2024-11-18 14:57:23 +02:00
01d3bd7d5c ci : use local ggml in Android build (#2567) 2024-11-16 20:45:41 +02:00
bb12cd9b77 ggml : tmp workaround for whisper.cpp (skip) (#2565) 2024-11-16 20:21:24 +02:00
f02b40bcb4 update : readme 2024-11-15 16:00:10 +02:00
83ac2842bd scripts : fix sync path 2024-11-15 15:24:09 +02:00
c4e95fb74d whisper.swiftui : switch Mac dest to Mac (Designed for iPad) (#2562) 2024-11-15 15:21:53 +02:00
e23721f3fb cmake : fix ppc64 check (#0) 2024-11-15 15:21:04 +02:00
c0a9f8ef85 whisper : include ggml-cpu.h (#0) 2024-11-15 15:21:04 +02:00
6477b84eb6 build : fixes 2024-11-15 15:21:04 +02:00
24d706774d talk-llama : sync llama.cpp 2024-11-15 15:21:04 +02:00
5089ab2d6a whisper : fix build (#0) 2024-11-15 15:21:04 +02:00
bdbb906817 sync : ggml 2024-11-15 15:21:04 +02:00
fa2ebd336e sycl : Fixes to broken builds and test-backend-ops (llama/10257)
* Fixes broken build for the SYCL CUDA backend caused by non-explicit gemm call in outprod (merged in with RWKV6 in
Optimize RWKV6 Operator Naming and Implement Multi-core CPU/ SYCL Acceleration #10133)

* Marks permuted MUL_MAT as unsupported to be able to run test-backend-ops

* Fixes asserts in norm to fix debug builds.
2024-11-15 15:21:04 +02:00
21b01a21b6 vulkan: Optimize contiguous copies (llama/10254)
* tests: Fix memory bandwidth calculation for perf tests

Add a flops calculation for flash attention.

Add one GGML_OP_CPY perf test.

* vulkan: Optimize contiguous copies

Add a variant of the copy shader for when the tensors are contiguous. Avoid
the complex addressing calculations, and do four elements per invocation
to hide some other overhead.

Apply similar changes to the scale shader, since scale is always contiguous.

Add a "progress bar" for shader compiles.
2024-11-15 15:21:04 +02:00
b54ce5edc5 vulkan: Throttle the number of shader compiles during the build step. (llama/10222)
Fixes #9582

Spawning too many concurrent copies of glslc leads to "Failed to create pipes"
errors on Linux. This change applies the same throttling we use for
multithreaded pipeline creation.
2024-11-15 15:21:04 +02:00
26a31b78e9 metal : more precise Q*K in FA vec kernel (llama/10247) 2024-11-15 15:21:04 +02:00
14d13c5f9f vulkan: Fix newly added tests for permuted mul_mat and 1D im2col (llama/10226) 2024-11-15 15:21:04 +02:00
5e110c2eb5 metal : reorder write loop in mul mat kernel + style (llama/10231)
* metal : reorder write loop

* metal : int -> short, style

ggml-ci
2024-11-15 15:21:04 +02:00
4a9926d521 metal : fix build and some more comments (llama/10229) 2024-11-15 15:21:04 +02:00
ae3c5642d0 metal : fix F32 accumulation in FA vec kernel (llama/10232) 2024-11-15 15:21:04 +02:00
e287a3b627 metal : hide debug messages from normal log 2024-11-15 15:21:04 +02:00
SXX
b890243690 ggml: fix zero division in ‘dne’ calculation in CUDA COUNT_EQUAL operator when ‘ne’ is small (#10213) 2024-11-15 15:21:04 +02:00
b7b38f7d68 ggml : optimize llamafile cpu matrix multiplication for ppc64le (llama/10156)
This change upstreams llamafile's cpu matrix
multiplication kernels for ppc64le using MMA
builtins for FP32 datatype.

This change results in a consistent 90%
improvement in input processing time, and 20%
to 80% improvement in output processing time,
across various batch sizes.

The patch is tested with Meta-Lllama-3-8B,
Mistral-7B, Llama-2-7B-chat-hf models on a
IBM POWER10 machine.

Signed-off-by: Amrita H S <amritahs@linux.vnet.ibm.com>
2024-11-15 15:21:04 +02:00
9f67aab211 metal : opt-in compile flag for BF16 (llama/10218)
* metal : opt-in compile flag for BF16

ggml-ci

* ci : use BF16

ggml-ci

* swift : switch back to v12

* metal : has_float -> use_float

ggml-ci

* metal : fix BF16 check in MSL

ggml-ci
2024-11-15 15:21:04 +02:00
8f0f785d88 metal : improve clarity (minor) (llama/10171) 2024-11-15 15:21:04 +02:00
d0b8335789 metal : optimize FA kernels (llama/10171)
* ggml : add ggml_flash_attn_ext_get_prec

* metal : use F16 precision in FA kernels

ggml-ci

* metal : minor clean-up

* metal : compile-guard bf16 FA kernels

ggml-ci

* build : remove obsolete compile flag [no ci]

* metal : prevent int overflows [no ci]

* cuda : disable BF16 FA

ggml-ci

* metal : fix BF16 requirement for FA kernels

ggml-ci

* make : clean-up [no ci]
2024-11-15 15:21:04 +02:00
1550be79f1 ggml : add ggml-cpu.h to the public headers (llama/10204) 2024-11-15 15:21:04 +02:00
807f848c2f fix q4_0_8_8 format for corrupted tokens issue (llama/10198)
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-62-167.us-west-2.compute.internal>
2024-11-15 15:21:04 +02:00
42398f13b0 Optimize RWKV6 Operator Naming and Implement Multi-core CPU/ SYCL Acceleration (llama/10133)
* rwkv6: rename to wkv6

* rwkv6: support avx2 avx512 armv8 armv9

* rwkv6: update cuda file name

* rwkv6: rename params

* wkv on sycl

* sycl: add some ops

* sycl: Enhance OP support judgment

* wkv6: drop armv9 and tranfer to GGML style

ggml-ci

* sync : ggml

* update the function to use appropriate types

* fix define error

* Update ggml/src/ggml-cpu.c

* add appropriate asserts

* move element-wise functions outside

* put the declaration outside the loop

* rewrite to be more inline with the common pattern for distributing threads

* use recommended way GGML_TENSOR_LOCALS

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Diego Devesa <slarengh@gmail.com>
Co-authored-by: Plamen Minev <pacominev@gmail.com>
Co-authored-by: Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
Co-authored-by: Meng, Hengyu <airdldl@163.com>
2024-11-15 15:21:04 +02:00
31c3482a4e metal : add BF16 support (llama/8439)
* ggml : add initial BF16 support

ggml-ci

* metal : add mul_mat_id BF16 support

ggml-ci

* metal : check for bfloat support on the Metal device

ggml-ci

* metal : better var names [no ci]

* metal : do not build bfloat kernels when not supported

ggml-ci

* metal : try to fix BF16 support check

ggml-ci

* metal : this should correctly check bfloat support
2024-11-15 15:21:04 +02:00
50257af686 metal : fix from ptr buffer name (llama/10189) 2024-11-15 15:21:04 +02:00
d111a0987e ggml : adjust is_first_call init value (llama/10193)
ggml-ci
2024-11-15 15:21:04 +02:00
915bcd2c63 metal : add quantized FA support (llama/10149)
* metal : add quantized FA (vec) support

ggml-ci

* metal : add quantized FA (non-vec) support

* metal : fix support check

ggml-ci

* metal : clean-up

* metal : clean-up (cont)

* metal : fix shared memory calc + reduce smem + comments

* metal : float-correctness

* metal : minor [no ci]
2024-11-15 15:21:04 +02:00
f69c8b6f1b ggml : fix arch check in bf16_to_fp32 (llama/10164) 2024-11-15 15:21:04 +02:00
Eve
8c9044bef0 Q6_K AVX improvements (llama/10118)
* q6_k instruction reordering attempt

* better subtract method

* should be theoretically faster

small improvement with shuffle lut, likely because all loads are already done at that stage

* optimize bit fiddling

* handle -32 offset separately. bsums exists for a reason!

* use shift

* Update ggml-quants.c

* have to update ci macos version to 13 as 12 doesnt work now. 13 is still x86
2024-11-15 15:21:04 +02:00
5f8e928194 ggml : fix gelu tables initialization (llama/10172) 2024-11-15 15:21:04 +02:00
25da30bd60 ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (llama/10167) 2024-11-15 15:21:04 +02:00
542734100e fix build break on arm64 linux (llama/10166)
This fixes the build break from the recent changes
to move the CPU backend to separate files
https://github.com/ggerganov/llama.cpp/pull/10144
2024-11-15 15:21:04 +02:00
b06b4c0c08 cuda : clear error after changing peer access (llama/10153) 2024-11-15 15:21:04 +02:00
939d36fb4c metal : simplify f16 and f32 dequant kernels (llama/0) 2024-11-15 15:21:04 +02:00
1471e41180 metal : move dequantize templates to beginning of MSL source (llama/0) 2024-11-15 15:21:04 +02:00
35949192e9 CANN: adjust backend registry refactor. (llama/10158)
remove buffer->iface.get_name that used in cann as it was removed in backend registry refactor PR.
2024-11-15 15:21:04 +02:00
9c817edb48 ggml : move CPU backend to a separate file (llama/10144) 2024-11-15 15:21:04 +02:00
24a0feb5d9 metal : minor fixup in FA kernel (llama/10143)
* metal : minor fixup in FA kernel

ggml-ci

* metal : use the unrolled loop variable

* metal : remove unused var
2024-11-15 15:21:04 +02:00
2ab8cce7e3 llama : add simple-chat example (llama/10124)
* llama : add simple-chat example

---------

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2024-11-15 15:21:04 +02:00
b40c255e98 llama : use smart pointers for ggml resources (llama/10117) 2024-11-15 15:21:04 +02:00
ec3e16445e vulkan : improve ggml_vk_create_buffer error handling (llama/9898) 2024-11-15 15:21:04 +02:00
0665168ef3 ggml : remove ggml_scratch (llama/10121)
ggml-ci
2024-11-15 15:21:04 +02:00
5f6b992eea build: fix build error in Windows env with OneAPI setup (llama/10107) 2024-11-15 15:21:04 +02:00
3e231ab9cc llama : fix buffer checks for mamba and rwk (llama/10111)
* llama : fix buffer checks for mamba and rwk

* llama : fix missing worst case flag during reserve

* cuda : fix supports_op for norm

* disable sched SET_CAUSE
2024-11-15 15:21:04 +02:00
371bfaca8c ggml : check tensor name lengths in gguf files (llama/10100) 2024-11-15 15:21:04 +02:00
91e30a3a23 kompute: add mul_mat_q4_k shader (llama/10097)
This is a more or less direct translation from the Metal implementation
to GLSL.

Signed-off-by: Sergio Lopez <slp@redhat.com>
2024-11-15 15:21:04 +02:00
1e122d66f9 kompute: add backend registry / device interfaces (llama/10045)
Get in line with the other backends by supporting the newer
backend/device registry interfaces.

Signed-off-by: Sergio Lopez <slp@redhat.com>
2024-11-15 15:21:04 +02:00
63a4e09a0f ggml : fix memory leaks when loading invalid gguf files (llama/10094)
* ggml : fix gguf string leak when reading kv pairs fails

* ggml : avoid crashing with GGML_ABORT when the KV has an invalid type

* ggml : avoid crashing on failed memory allocations when loading a gguf file
2024-11-15 15:21:04 +02:00
75dd198870 ggml : add Q4_0_8_8 RISC-V GEMV and GEMM kernels (llama/10029)
* ggml : RISC-V vector gemv for q4_0_8x8

* ggml : Added WIP rvv q4_0_8x8 gemm

* ggml : Added initial implementation of rvv gemm

* ggml : optimize gemm to avoid register spillover

* ggml : Fix GCC rvv load alignment issue

* ggml : Format gemm rvv code

* ggml : Fix a typo in RVV q4_0_8_8 GEMM
2024-11-15 15:21:04 +02:00
1d48457aa6 llama : refactor model loader with backend registry (llama/10026) 2024-11-15 15:21:04 +02:00
307712a903 ggml: Add POOL2D OP for GPU acceleration to the Vulkan backend in the MobileVLM model. (llama/9763)
* ggml: Add POOL2D OP for GPU ACC to the Vulkan.

- The MobileVLM model now supports inference acceleration through GPU by utilizing the Vulkan backend.
- A GGML_OP_POOL_2D shader has been added. (Pooling)
- The encoding performance of the CLIP model improved from 2.8s on the CPU to 0.7s on the GPU.

Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com>

* [fix] Correct the incorrect order of the parameters.

fix casting to int.

Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com>

---------

Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com>
2024-11-15 15:21:04 +02:00
fbc9a05ddf musa: workaround for Guilty Lockup in cleaning src0 (llama/10042)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2024-11-15 15:21:04 +02:00
28496ac55e cmake : make it possible linking ggml as external lib (ggml/1003) 2024-11-15 15:21:04 +02:00
b1c06c09b0 metal : fix minor string leaks (ggml/1004) 2024-11-15 15:21:04 +02:00
498ac0dc27 scripts : update sync 2024-11-15 15:21:04 +02:00
03af461de8 ci : fix building workflow for linux/arm64 container (#2555) 2024-11-15 11:07:17 +02:00
f19463ece2 ruby : extend API (#2551)
* Handle objs in Ruby code

* Add task to make Makefile

* Share commont constance in test suites

* Add model-related APIs

* Add Whisper::Model class

* Add tests for Whisper::Model

* Add missing LDFLAG -lstdc++

* Add tests for Whisper.log_set

* Add Whisper.set_log

* Define log level

* Add document on logging

* Add license section to README

* Add document on Whisper::Model

* Fix examples in README

* Add test for Model with GC

* Make dependency on Makefile more accurate

* Fix bug about Whisper::Model and GC
2024-11-13 21:52:56 +02:00
5f8a086e22 whisper.swiftui : add model download list & bench methods (#2546)
* swift : fix resources & exclude build

* whisper : impl whisper_timings struct & api

* whisper.swiftui : model list & bench methods

* whisper : return ptr for whisper_get_timings

* revert unnecessary change

* whisper : avoid designated initializer

* whisper.swiftui: code style changes

* whisper.swiftui : get device name / os from UIDevice

* whisper.swiftui : fix UIDevice usage

* whisper.swiftui : add memcpy and ggml_mul_mat (commented)
2024-11-13 21:51:34 +02:00
a28d82e373 ruby : fix the instructions (#2548)
#prompt doesn't exist but #initial_prompt does
2024-11-13 21:47:42 +02:00
5ccca19f0c ggml : vulkan logs (#2547) 2024-11-13 21:47:15 +02:00
300c07b94d examples : fix ffmpeg v5 build (#2543)
remove call to 'av_register_all()' which does not exist in ffmpeg v5
anymore.
2024-11-13 21:41:52 +02:00
31aea563a8 whisper : fix extra memory usage (#2534)
* passing samples_padded by ref to the threads.

* passing samples_padded by ref to the threads.

---------

Co-authored-by: Vinith Misra <physicsdemon@gmail.com>
2024-11-06 23:02:11 +02:00
0377596b77 whisper : backend registry init before model load 2024-11-01 10:19:05 +02:00
c65d0fd3c8 talk-llama : sync llama.cpp 2024-11-01 10:19:05 +02:00
d9efb664ac sync : ggml 2024-11-01 10:19:05 +02:00
b5b4b0f5de ggml : add AMX backend (llama/8998) 2024-11-01 10:19:05 +02:00
ab36d02560 metal : support permuted matrix multiplicaions (llama/10033)
* metal : support permuted matrix multiplicaions

ggml-ci

* cont : use nb01 directly for row steps

ggml-ci

* cont : add comments [no ci]

* metal : minor refactor

* metal : minor
2024-11-01 10:19:05 +02:00
6e67749c00 CUDA: fix insufficient buffer clearing for MMQ (llama/10032) 2024-11-01 10:19:05 +02:00
ab0385f43b CUDA: fix MMQ for non-contiguous src0, add tests (llama/10021)
* CUDA: fix MMQ for non-contiguous src0, add tests

* revise test code
2024-11-01 10:19:05 +02:00
10eb603a3c increase cuda_cpy block size (ggml/996)
Co-authored-by: bssrdf <bssrdf@gmail.com>
2024-11-01 10:19:05 +02:00
a3231b2f2e metal : add POOL2D and fix IM2COL (llama/9943)
* add pool_2d

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* fix im2col and add unittest for N>=1024

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* add tests for N % 1024 != 0

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* remove trailing whitespaces

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* apply suggestions

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* apply more optimization

- original IM2COL kernel + _ext with MIN()

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* apply review: change kernel name of pool_2d

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* apply review

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

* fix more formatting and enhance readability

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>

---------

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>
2024-11-01 10:19:05 +02:00
13db492f83 Adapt to dynamically loadable backends mechanism (llama/9970)
* [CANN] Adapt to dynamically loadable backends mechanism

* Fix the Bug: inference running result is garbled in debug running model for LM models who's type is Q4_0 class

* Handle the review comments of this pull request
2024-11-01 10:19:05 +02:00
741c138aa1 ggml : add asserts for type conversion in fattn kernels (llama/9971)
ggml-ci
2024-11-01 10:19:05 +02:00
25f9fee6fb rpc : pack only RPC structs (llama/9959) 2024-11-01 10:19:05 +02:00
7c1570bee6 fix mul_mat_vec_q and *_vec_q error (llama/9939)
Co-authored-by: arthw <14088817+arthw@users.noreply.github.com>
2024-11-01 10:19:05 +02:00
4078e4c388 rpc : backend refactoring (llama/9912)
* rpc : refactor backend

Use structs for RPC request/response messages

* rpc : refactor server
2024-11-01 10:19:05 +02:00
a4a22daa8f Add SYCL Backend registry, device and Event Interfaces (llama/9705)
* implemented missing SYCL event APIs

* sycl : Added device and backend reg interfaces

* Restructured ggml-sycl.cpp
2024-11-01 10:19:05 +02:00
e1936eb2a5 add amx kernel for gemm (llama/8998)
add intel amx isa detection

add vnni kernel for gemv cases

add vnni and amx kernel support for block_q8_0

code cleanup

fix packing B issue

enable openmp

fine tune amx kernel

switch to aten parallel pattern

add error message for nested parallelism

code cleanup

add f16 support in ggml-amx

add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS

update CMakeList

update README

fix some compilation warning

fix compiler warning when amx is not enabled

minor change

ggml-ci

move ggml_amx_init from ggml.c to ggml-amx/mmq.cpp

ggml-ci

update CMakeLists with -mamx-tile, -mamx-int8 and -mamx-bf16

ggml-ci

add amx as an ggml-backend

update header file, the old path for immintrin.h has changed to ggml-cpu-impl.h

minor change

update CMakeLists.txt

minor change

apply weight prepacking in set_tensor method in ggml-backend

fix compile error

ggml-ci

minor change

ggml-ci

update CMakeLists.txt

ggml-ci

add march dependency

minor change

ggml-ci

change ggml_backend_buffer_is_host to return false for amx backend

ggml-ci

fix supports_op

use device reg for AMX backend

ggml-ci

minor change

ggml-ci

minor change

fix rebase

set .buffer_from_host_ptr to be false for AMX backend
2024-11-01 10:19:05 +02:00
28b044dad9 vulkan : add backend registry / device interfaces (llama/9721)
* vulkan : add backend registry / device interfaces

* llama : print devices used on model load
2024-11-01 10:19:05 +02:00
b8f11a0a17 fix: allocating CPU buffer with size 0 (llama/9917) 2024-11-01 10:19:05 +02:00
ff5a838099 fix: use vm_allocate to allocate CPU backend buffer on macOS (llama/9875)
* fix: use `vm_allocate` to allocate CPU backend buffer on macOS

* fix: switch to `posix_memalign` to keep existing `free()` usages work

* feat: move `GGML_ALIGNED_MALLOC` to `ggml-backend-impl.h`, add support for `vm_allocate` on macOS

* style: formatting

* fix: move const outside of `#ifndef`

* style: formatting

* fix: unused var

* fix: transform `GGML_ALIGNED_MALLOC` and `GGML_ALIGNED_FREE` into functions and add them to `ggml-impl.h`

* fix: unused var

* fix: page align to `GGUF_DEFAULT_ALIGNMENT`

* fix: page align to `TENSOR_ALIGNMENT`

* fix: convert `TENSOR_ALIGNMENT` to a macro

* fix: increase page size to `32` on iOS

* fix: iOS page size

* fix: `hbw_posix_memalign` alignment
2024-11-01 10:19:05 +02:00
84713613be CUDA: fix 1D im2col, add tests (ggml/993) 2024-11-01 10:19:05 +02:00
ded89c9d08 Fix cann compilation error (llama/9891)
Fix cann compilation error after merging llama.cpp supports dynamically loadable backends.
2024-11-01 10:19:05 +02:00
042e95d92f Vectorize load instructions in dmmv f16 CUDA kernel (llama/9816)
* Vectorize load instructions in dmmv f16 CUDA kernel

Replaces scalar with vector load instructions, which substantially
improves performance on NVIDIA HBM GPUs, e.g. gives a 1.27X overall
speedup for Meta-Llama-3-8B-Instruct-F16 BS1 inference evaluation on
H100 SXM 80GB HBM3. On GDDR GPUs, there is a slight (1.01X) speedup.

* addressed comment

* Update ggml/src/ggml-cuda/dmmv.cu

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2024-11-01 10:19:05 +02:00
81110c0174 ggml : move more prints to the ggml log system (llama/9839)
* ggml : move more prints to the ggml log system

* show BLAS OpenMP warnings in all builds using debug print
2024-11-01 10:19:05 +02:00
c313723860 rpc : add backend registry / device interfaces (llama/9812)
* rpc : add backend registry / device interfaces

* llama : add llama_supports_rpc API

* ggml_backend_rpc_start_rpc_server -> ggml_backend_rpc_start_server
2024-11-01 10:19:05 +02:00
e69b2371e2 musa: add docker image support (llama/9685)
* mtgpu: add docker image support

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>

* mtgpu: enable docker workflow

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>

---------

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2024-11-01 10:19:05 +02:00
1531259b2c ggml : fix BLAS with unsupported types (llama/9775)
* ggml : do not use BLAS with types without to_float

* ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies

* ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits

it's not really internal if everybody uses it
2024-11-01 10:19:05 +02:00
44bc2767fd ggml : add backend registry / device interfaces to BLAS backend (llama/9752)
* ggml : add backend registry / device interfaces to BLAS backend

* fix mmap usage when using host buffers
2024-11-01 10:19:05 +02:00
bd7ace7adc Update building for Android (llama/9672)
* docs : clarify building Android on Termux

* docs : update building Android on Termux

* docs : add cross-compiling for Android

* cmake : link dl explicitly for Android
2024-11-01 10:19:05 +02:00
315364d7de ggml : add metal backend registry / device (llama/9713)
* ggml : add metal backend registry / device

ggml-ci

* metal : fix names [no ci]

* metal : global registry and device instances

ggml-ci

* cont : alternative initialization of global objects

ggml-ci

* llama : adapt to backend changes

ggml-ci

* fixes

* metal : fix indent

* metal : fix build when MTLGPUFamilyApple3 is not available

ggml-ci

* fix merge

* metal : avoid unnecessary singleton accesses

ggml-ci

* metal : minor fix [no ci]

* metal : g_state -> g_ggml_ctx_dev_main [no ci]

* metal : avoid reference of device context in the backend context

ggml-ci

* metal : minor [no ci]

* metal : fix maxTransferRate check

* metal : remove transfer rate stuff

---------

Co-authored-by: slaren <slarengh@gmail.com>
2024-11-01 10:19:05 +02:00
80753d4da8 metal : single allocation of encode_async block (llama/9747)
* Single allocation of encode_async block with non-ARC capture in ggml-metal.m

* Moving Block_release to the deallocation code

* Release encode block when re-setting encoding buffer count if needed

* Update ggml/src/ggml-metal.m

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-11-01 10:19:05 +02:00
8f9bdca4c4 ggml-alloc : remove buffer_id from leaf_alloc (ggml/987)
This commit removes the buffer_id field from the leaf_alloc struct.

The motivation for is that this field is only written to and never
read/used as far as I can tell. Each tensor_alloc has a buffer_id field
and this is what caused me to look into this more closely, to
understand what the buffer_id in leaf_alloc was used for.
2024-11-01 10:19:05 +02:00
4e10afb5a9 scripts : sync amx 2024-10-31 22:13:24 +02:00
aa037a60f3 ggml : alloc ggml_contexts on the heap (#2525)
* whisper : reduce ggml_context usage

* ggml : allocate contexts on the heap (v2)

* ggml : aligned malloc -> malloc
2024-10-31 22:00:09 +02:00
19dca2bb14 ci : fix openblas build (#2511)
* ci : fix openblas build

* cont : would this work?

* ci : I'm sorry, windows

* cont : disabled wrong build

* ci : fix openblas build with pkgconfiglite (#2517)

- choco install pkgconfiglite (vcpkg-pkgconf doesn't contain pkg-config executable?)
- vcpkg install openblas (otherwise it is not detected now)

---------

Co-authored-by: Tamotsu Takahashi <ttakah+github@gmail.com>
2024-10-30 12:58:26 +02:00
55e422109b scripts : add turbo-q8_0 to the benchmark 2024-10-29 19:37:24 +02:00
3f020fac9d whisper : minor compile warning 2024-10-29 19:30:26 +02:00
1626b73b03 whisper : move new-segment callback after DTW step (#2515) 2024-10-29 08:47:21 +02:00
850f7b19d3 ruby : fix installation test (#2519) 2024-10-29 08:45:37 +02:00
d4bc413505 ruby : add more APIs (#2518)
* Add test for built package existence

* Add more tests for Whisper::Params

* Add more Whisper::Params attributes

* Add tests for callbacks

* Add progress and abort callback features

* [skip ci] Add prompt usage in README

* Change prompt text in example
2024-10-28 19:23:23 +02:00
fc49ee4479 ruby : support new-segment callback (#2506)
* Add Params#new_segment_callback= method

* Add tests for Params#new_segment_callback=

* Group tests for #transcribe

* Don't use static for thread-safety

* Set new_segment_callback only when necessary

* Remove redundant check

* [skip ci] Add Ruby version README

* Revert "Group tests for #transcribe"

This reverts commit 71b65b00cc.

* Revert "Add tests for Params#new_segment_callback="

This reverts commit 81e6df3bab.

* Add test for Context#full_n_segments

* Add Context#full_n_segments

* Add tests for lang API

* Add lang API

* Add tests for Context#full_lang_id API

* Add Context#full_lang_id

* Add abnormal test cases for lang

* Raise appropriate errors from lang APIs

* Add tests for Context#full_get_segment_t{0,1} API

* Add Context#full_get_segment_t{0,1}

* Add tests for Context#full_get_segment_speaker_turn_next API

* Add Context#full_get_segment_speaker_turn_next

* Add tests for Context#full_get_segment_text

* Add Context#full_get_setgment_text

* Add tests for Params#new_segment_callback=

* Run new segment callback

* Split tests to multiple files

* Use container struct for new segment callback

* Add tests for Params#new_segment_callback_user_data=

* Add Whisper::Params#new_user_callback_user_data=

* Add GC-related test for new segment callback

* Protect new segment callback related structs from GC

* Add meaningful test for build

* Rename: new_segment_callback_user_data -> new_segment_callback_container

* Add tests for Whisper::Segment

* Add Whisper::Segment and Whisper::Context#each_segment

* Extract c_ruby_whisper_callback_container_allocate()

* Add test for Whisper::Params#on_new_segment

* Add Whisper::Params#on_new_egment

* Assign symbol IDs to variables

* Make extsources.yaml simpler

* Update README

* Add document comments

* Add test for calling Whisper::Params#on_new_segment multiple times

* Add file dependencies to GitHub actions config and .gitignore

* Add more files to ext/.gitignore
2024-10-28 15:43:27 +02:00
c0ea41f6b2 ruby : add Metal support (#2516) 2024-10-28 13:08:09 +02:00
0fbaac9c89 whisper : fix index overflow in token-level timestamp logic (#2505) 2024-10-23 15:14:03 +03:00
a5abfe6a90 readme : update links and make commands (#2489)
* Update links to headers in README.md

* Add link to Vulkan section in README.md

* Add "-j" for parallelism for "make" in README.md

* Update README.md
2024-10-17 13:25:18 +03:00
d3f7137cc9 ruby : fix bindings (#2484)
* Improve Rakefile

* Remove intermediate files

* Remove unnecessary manipulations from extconf.rb

* Add README and LINCENSE to source files

* Manage ext source files using YAML file

* Use extsources.yaml to include files into gem package file

* Add git-managed source files to build dependency

* Add test task

* Download model for test if not exists

* Add test for build

* Ignore gem package directory

* Enable GitHub action for Ruby binding

* Fix model name

* Build lib file for test

* Use extension for each platform

* Use extension for each platform on testing

* Move built lib file rather than copy

* Add intermediate files to clean targets
2024-10-16 18:44:04 +03:00
f7c99e49b3 readme : add Vulkan notice (#2488)
* Add Vulkan notice in README.md

* Fix formatting for Vulkan section in README.md

* Fix formatting in README.md
2024-10-16 18:43:26 +03:00
1d5752fa42 make : fix GGML_VULKAN=1 build (#2485) 2024-10-16 18:42:47 +03:00
b6049060dd whisper : add dtw preset for large-v3-turbo (#2481) 2024-10-15 21:00:21 +03:00
06a1da9daf convert : handle max_target_positions (#2477)
as needed eg for
https://huggingface.co/primeline/whisper-large-v3-turbo-german/blob/main/config.json
2024-10-14 10:46:33 +03:00
746d173592 readme : update the Quick Start section (#2475)
navigating into the directory
2024-10-14 10:44:57 +03:00
fdbfb460ed whisper : add OpenVINO init with state (#2464)
* Fixed OpenVino init on state

* Removed an empty line

* Fixed typo

* Replaced tabs with spaces

---------

Co-authored-by: Sandro Hanea <sandrohanea@users.noreply.github.com>
2024-10-08 20:08:00 +03:00
597 changed files with 223532 additions and 103796 deletions

View File

@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
ARG CUDA_DOCKER_ARCH=all
RUN apt-get update && \
apt-get install -y build-essential git cmake libsdl2-dev
apt-get install -y build-essential git cmake libsdl2-dev wget git
WORKDIR /app
@ -23,6 +23,6 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable cuBLAS
ENV GGML_CUDA=1
RUN make
RUN make base.en
ENTRYPOINT ["/app/main"]

View File

@ -17,7 +17,7 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
ENV GGML_CUDA=1
RUN apt-get update && \
apt-get install -y build-essential libsdl2-dev \
apt-get install -y build-essential libsdl2-dev wget cmake git \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
# Ref: https://stackoverflow.com/a/53464012
@ -25,7 +25,7 @@ ENV CUDA_MAIN_VERSION=12.3
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
COPY .. .
RUN make
RUN make base.en
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
ENV CUDA_MAIN_VERSION=12.3
@ -33,7 +33,7 @@ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
WORKDIR /app
RUN apt-get update && \
apt-get install -y curl ffmpeg \
apt-get install -y curl ffmpeg wget cmake git \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
COPY --from=build /app /app

View File

@ -2,17 +2,17 @@ FROM ubuntu:22.04 AS build
WORKDIR /app
RUN apt-get update && \
apt-get install -y build-essential \
apt-get install -y build-essential wget cmake git \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
COPY .. .
RUN make
RUN make base.en
FROM ubuntu:22.04 AS runtime
WORKDIR /app
RUN apt-get update && \
apt-get install -y curl ffmpeg libsdl2-dev \
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
COPY --from=build /app /app

View File

@ -10,8 +10,8 @@ on:
- whisper.h
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
ubuntu-22:
runs-on: ubuntu-22.04
steps:
- uses: actions/setup-go@v5
with:

65
.github/workflows/bindings-ruby.yml vendored Normal file
View File

@ -0,0 +1,65 @@
name: Bindings Tests (Ruby)
on:
push:
paths:
- bindings/ruby/**
- src/**/*.c
- src/**/*.cpp
- src/**/*.h
- src/**/*.m
- src/**/*.metal
- include/**/*.c
- include/**/*.cpp
- include/**/*.h
- include/**/*.m
- include/**/*.metal
- ggml/**/*.c
- ggml/**/*.cpp
- ggml/**/*.h
- ggml/**/*.m
- ggml/**/*.metal
- scripts/get-flags.mk
- examples/common.h
- examples/common.cpp
- examples/common-whisper.h
- examples/common-whisper.cpp
- examples/stb_vorbis.c
- examples/miniaudio.h
pull_request:
paths:
- bindings/ruby/**
- src/**/*.c
- src/**/*.cpp
- src/**/*.h
- src/**/*.m
- src/**/*.metal
- include/**/*.c
- include/**/*.cpp
- include/**/*.h
- include/**/*.m
- include/**/*.metal
- ggml/**/*.c
- ggml/**/*.cpp
- ggml/**/*.h
- ggml/**/*.m
- ggml/**/*.metal
- scripts/get-flags.mk
- examples/common.h
- examples/common.cpp
- examples/common-whisper.h
- examples/common-whisper.cpp
- examples/stb_vorbis.c
- examples/miniaudio.h
jobs:
ubuntu-22:
runs-on: ubuntu-22.04
defaults:
run:
working-directory: bindings/ruby
steps:
- uses: ruby/setup-ruby@v1
with:
ruby-version: '3.1'
- uses: actions/checkout@v4
- run: rake test

View File

@ -1,23 +0,0 @@
# TODO: fix this workflow file, disabled for now
name: Bindings Tests (Ruby)
on:
push:
paths:
- bindings/ruby/**
- whisper.h
pull_request:
paths:
- bindings/ruby/**
- whisper.h
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
steps:
- uses: ruby/setup-ruby@v1
with:
ruby-version: '3.0'
- uses: actions/checkout@v1
- run: |
cd bindings/ruby/ext
ruby extconf.rb && make

View File

@ -1,17 +1,28 @@
name: CI
on: [push, pull_request]
on:
push:
branches:
- master
pull_request:
types: [opened, synchronize, reopened]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
ubuntu_image: "ubuntu:22.04"
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
ubuntu-22:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
arch: [linux/amd64, linux/ppc64le]
steps:
- name: Clone
@ -27,53 +38,129 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential libsdl2-dev
make
make stream'
apt install -y build-essential libsdl2-dev cmake git
cmake -B build
cmake --build build --config Release -j $(nproc)'
ubuntu-22-arm64:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
arch: [linux/arm64]
steps:
- name: Clone
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build ${{ matrix.arch }}
run: |
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential libsdl2-dev cmake git
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
cmake --build build --config Release -j $(nproc)'
ubuntu-22-arm-v7:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
arch: [linux/arm/v7]
steps:
- name: Clone
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build ${{ matrix.arch }}
run: |
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential libsdl2-dev cmake git
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
cmake --build build --config Release -j $(nproc)'
macOS-latest:
runs-on: macOS-latest
strategy:
matrix:
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: macOS-latest-swift
evict-old-files: 1d
- name: Dependencies
run: |
brew update
brew install sdl2
brew install sdl2 cmake
- name: Build
run: |
make
make stream
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DWHISPER_BUILD_EXAMPLES=OFF \
-DWHISPER_BUILD_TESTS=OFF \
-DWHISPER_BUILD_SERVER=OFF \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
freeBSD-latest:
runs-on: macos-12
- name: xcodebuild for swift package
id: xcodebuild
run: |
./build-xcframework.sh
steps:
- name: Clone
uses: actions/checkout@v4
- name: Build
uses: cross-platform-actions/action@v0.24.0
with:
operating_system: freebsd
version: '13.3'
run: |
sudo pkg update
sudo pkg install -y gmake sdl2
gmake
gmake stream
# freeBSD-latest:
# runs-on: macos-12
#
# steps:
# - name: Clone
# uses: actions/checkout@v4
#
# - name: Build
# uses: cross-platform-actions/action@v0.24.0
# with:
# operating_system: freebsd
# version: '13.3'
# run: |
# sudo pkg update
# sudo pkg install -y gmake sdl2 cmake
# cmake -B build
# cmake --build build --config Release
ubuntu-latest-gcc:
runs-on: ubuntu-latest
ubuntu-22-gcc:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
build: [Debug, Release]
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
arch: [linux/amd64, linux/ppc64le]
steps:
- name: Clone
@ -89,13 +176,69 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake libsdl2-dev
apt install -y build-essential cmake libsdl2-dev git
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
make
ctest -L gh --output-on-failure'
ubuntu-latest-clang:
runs-on: ubuntu-latest
ubuntu-22-gcc-arm64:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
build: [Debug, Release]
arch: [linux/arm64]
steps:
- name: Clone
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build ${{ matrix.arch }}
run: |
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake libsdl2-dev git
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
make
ctest -L gh --output-on-failure'
ubuntu-22-gcc-arm-v7:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
build: [Debug, Release]
arch: [linux/arm/v7]
steps:
- name: Clone
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Build ${{ matrix.arch }}
run: |
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake libsdl2-dev git
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
make
ctest -L gh --output-on-failure'
ubuntu-22-clang:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
@ -120,13 +263,13 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y clang build-essential cmake libsdl2-dev
apt install -y clang build-essential cmake libsdl2-dev git
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
make
ctest -L gh --output-on-failure'
ubuntu-latest-gcc-sanitized:
runs-on: ubuntu-latest
ubuntu-22-gcc-sanitized:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
@ -148,7 +291,7 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake
apt install -y build-essential cmake git
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
make
ctest -L gh --output-on-failure'
@ -183,12 +326,12 @@ jobs:
shell: bash
run: |
sudo apt update
sudo apt install intel-oneapi-compiler-dpcpp-cpp
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
- name: install oneAPI MKL library
shell: bash
run: |
sudo apt install intel-oneapi-mkl-devel
sudo apt install intel-oneapi-mkl-devel git
- name: Clone
id: checkout
@ -233,7 +376,7 @@ jobs:
shell: bash
run: |
sudo apt update
sudo apt install intel-oneapi-compiler-dpcpp-cpp
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
- name: install oneAPI MKL library
shell: bash
@ -274,30 +417,16 @@ jobs:
msystem: ${{matrix.sys}}
install: >-
base-devel
git
mingw-w64-${{matrix.env}}-toolchain
mingw-w64-${{matrix.env}}-cmake
mingw-w64-${{matrix.env}}-SDL2
mingw-w64-${{matrix.env}}-openblas
- name: Build using make
shell: msys2 {0}
run: |
make -j $(nproc)
- name: Clean after building using make
shell: msys2 {0}
run: |
make clean
- name: Build using make w/ OpenBLAS
shell: msys2 {0}
run: |
make GGML_OPENBLAS=1 -j $(nproc)
- name: Build using CMake
shell: msys2 {0}
run: |
cmake -B build
cmake -B build -DWHISPER_SDL2=ON
cmake --build build --config ${{ matrix.build }} -j $(nproc)
- name: Clean after building using CMake
@ -308,7 +437,7 @@ jobs:
- name: Build using CMake w/ OpenBLAS
shell: msys2 {0}
run: |
cmake -B build -DGGML_OPENBLAS=ON
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
cmake --build build --config ${{ matrix.build }} -j $(nproc)
windows:
@ -382,10 +511,8 @@ jobs:
sdl2: [ON]
include:
- arch: Win32
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
s2arc: x86
- arch: x64
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
s2arc: x64
- sdl2: ON
s2ver: 2.28.5
@ -394,17 +521,21 @@ jobs:
- name: Clone
uses: actions/checkout@v4
- name: Export GitHub Actions cache environment variables
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v2
- name: Fetch OpenBLAS
- name: Install OpenBLAS and pkgconfiglite
if: matrix.blas == 'ON'
run: |
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
7z x blas.zip -oblas -y
copy blas/include/cblas.h .
copy blas/include/openblas_config.h .
echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas
choco install pkgconfiglite
- name: Fetch SDL2 and set SDL2_DIR
if: matrix.sdl2 == 'ON'
@ -416,9 +547,10 @@ jobs:
- name: Configure
run: >
cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DGGML_OPENBLAS=${{ matrix.blas }}
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
-DGGML_BLAS=${{ matrix.blas }}
-DGGML_BLAS_VENDOR=OpenBLAS
-DWHISPER_SDL2=${{ matrix.sdl2 }}
- name: Build
@ -426,9 +558,9 @@ jobs:
cd ./build
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
- name: Copy libopenblas.dll
- name: Copy openblas.dll
if: matrix.blas == 'ON'
run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }}
- name: Copy SDL2.dll
if: matrix.sdl2 == 'ON'
@ -443,7 +575,6 @@ jobs:
windows-cublas:
runs-on: windows-2019
strategy:
matrix:
build: [Release]
@ -453,12 +584,10 @@ jobs:
cuda-toolkit: [12.2.0, 11.8.0]
include:
- arch: x64
s2arc: x64
- sdl2: ON
s2ver: 2.28.5
sdl2: ON
sdl2_ver: 2.28.5
steps:
- name: Clone
- name: Clone repository
uses: actions/checkout@v4
- name: Add msbuild to PATH
@ -470,45 +599,50 @@ jobs:
with:
cuda: '${{ matrix.cuda-toolkit }}'
- name: Install 7-Zip
run: choco install 7zip -y
- name: Fetch SDL2 and set SDL2_DIR
if: matrix.sdl2 == 'ON'
run: |
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip
7z x sdl2.zip
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
- name: Configure
run: >
cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DGGML_CUDA=${{ matrix.cublas }}
-DWHISPER_SDL2=${{ matrix.sdl2 }}
- name: Configure CMake
shell: cmd
run: |
cmake -S . -B ./build -A ${{ matrix.arch }} ^
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
-DGGML_CUDA=${{ matrix.cublas }} ^
-DCMAKE_CUDA_ARCHITECTURES=all ^
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
-DSDL2_DIR="%SDL2_DIR%"
- name: Build ${{ matrix.cuda-toolkit }}
- name: Build Project
shell: cmd
run: |
cd ./build
cmake --build . --config ${{ matrix.build }}
- name: Copy CUDA DLLs
run: >
Copy-Item -PassThru
-Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
-Include cudart64_*,cublas64_*,cublasLt64_*
-Destination build/bin/${{ matrix.build }}
run: |
Get-ChildItem "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/" -Filter "*.dll" |
Copy-Item -Destination "build/bin/${{ matrix.build }}"
- name: Copy SDL2.dll
if: matrix.sdl2 == 'ON'
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }}
- name: Upload binaries
if: matrix.sdl2 == 'ON'
uses: actions/upload-artifact@v4
with:
name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
path: build/bin/${{ matrix.build }}
emscripten:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
strategy:
matrix:
@ -529,7 +663,7 @@ jobs:
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
make
ios:
ios-xcode-build:
runs-on: macos-latest
strategy:
@ -537,7 +671,7 @@ jobs:
build: [Release]
steps:
- name: Clone
- name: Checkout code
uses: actions/checkout@v4
- name: Configure
@ -545,14 +679,36 @@ jobs:
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
mkdir models/ggml-base.en-encoder.mlmodelc
- name: Build
id: cmake_build
run: |
sysctl -a
mkdir build
cd build
cmake -G Xcode .. \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DWHISPER_BUILD_EXAMPLES=OFF \
-DWHISPER_BUILD_TESTS=OFF \
-DWHISPER_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
- name: xcodebuild for swift package
id: xcodebuild
run: |
./build-xcframework.sh
- name: Build objc example
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
- name: Build swiftui example
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
android:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Clone
@ -560,12 +716,6 @@ jobs:
with:
path: whisper
- name: Clone
uses: actions/checkout@v4
with:
repository: ggerganov/ggml
path: ggml
- name: Install Java
uses: actions/setup-java@v4
with:
@ -584,11 +734,11 @@ jobs:
run: |
export PATH_TO_GGML=$PWD/ggml
cd whisper/examples/whisper.android
./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
./gradlew assembleRelease --no-daemon
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
# android_java:
# runs-on: ubuntu-latest
# runs-on: ubuntu-22.04
#
# steps:
# - name: Clone
@ -657,7 +807,7 @@ jobs:
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
quantize:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Clone
@ -666,5 +816,6 @@ jobs:
- name: Test quantize
run: |
./models/download-ggml-model.sh tiny.en
make quantize
./quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
cmake -B build
cmake --build build --config Release
./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0

View File

@ -11,13 +11,13 @@ jobs:
name: Push Docker image to Docker Hub
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
env:
COMMIT_SHA: ${{ github.sha }}
strategy:
matrix:
config:
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
#TODO: the cuda image keeps failing - disable for now
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
@ -28,6 +28,8 @@ jobs:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -45,7 +47,7 @@ jobs:
with:
context: .
push: true
platforms: ${{ matrix.config.platforms }}
platforms: ${{ matrix.config.platform }}
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }}
@ -54,6 +56,6 @@ jobs:
with:
context: .
push: ${{ github.event_name == 'push' }}
platforms: ${{ matrix.config.platforms }}
platforms: ${{ matrix.config.platform }}
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
file: ${{ matrix.config.dockerfile }}

View File

@ -10,8 +10,8 @@ on:
- whisper.h
jobs:
addon_node-ubuntu-latest:
runs-on: ubuntu-latest
addon_node-ubuntu-22:
runs-on: ubuntu-22.04
strategy:
matrix:
node-version: [ 16.x, 18.x ]
@ -22,7 +22,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential
sudo apt-get install build-essential git
sudo apt-get install cmake
sudo apt-get install libsdl2-dev

6
.gitignore vendored
View File

@ -1,5 +1,6 @@
*.o
*.a
*.d
.cache/
.coreml/
.test/
@ -19,6 +20,9 @@ build-*/
.swiftpm
*.metallib
ggml-metal-embed.metal
ggml-metal-embed.metal.tmp
/main
/stream
/command
@ -54,3 +58,5 @@ cmake-build-debug/
.cxx/
.gradle/
local.properties
.log
.exe

211
AUTHORS
View File

@ -1,34 +1,51 @@
# date: Tue Apr 9 20:27:03 EEST 2024
# date: Tue Feb 4 13:03:35 EET 2025
# this file is auto-generated by scripts/gen-authors.sh
0/0 <zero@imaskeleton.me>
0cc4m <picard12@live.de>
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
65a <10104049+65a@users.noreply.github.com>
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
AT <manyoso@users.noreply.github.com>
Aarni Koskela <akx@iki.fi>
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Aaron Taylor <aaron@exphat.com>
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
Adam Jones <domdomegg+git@gmail.com>
Adrien Gallouët <adrien@gallouet.fr>
Adrien Gallouët <angt@huggingface.co>
AfryMask <AfryMask@163.com>
Ahmad Bilal <ahmad.bilal@empglabs.com>
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
AidanBeltonS <aidan.belton@codeplay.com>
Akarshan Biswas <akarshan.biswas@gmail.com>
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
Akash Mahajan <akash7190@gmail.com>
Akash Mahajan <akashmjn@stanford.edu>
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
Alan <unknown>
Albert Jin <albert.jin@gmail.com>
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
Alex Azarov <alex@azarov.by>
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
Alex Evgrashin <aevgrashin@yandex.ru>
Alex O'Connell <35843486+acon96@users.noreply.github.com>
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
Alexandru Mariuti <alex@mariuti.com>
Alexey Kharlamov <alexey@kharlamov.biz>
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
Ali Alameh <ali.alameh@isae.edu.lb>
Alter <0x7c48@gmail.com>
Ananta Bastola <anantarajbastola@gmail.com>
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
Andreas Lubbe <git@lubbe.org>
Andreu Huguet <andreuhuguet@gmail.com>
Andrew Huynh <a5thuynh@gmail.com>
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
Andrew S <andrews54757@gmail.com>
Andy Maloney <asmaloney@gmail.com>
Anton Kostin <masguit42@users.noreply.github.com>
@ -40,8 +57,11 @@ AustinMroz <austinmroz@utexas.edu>
Avik Sengupta <avik@sengupta.net>
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
Baffin Lee <baffinlee@gmail.com>
Ben Ashbaugh <ben.ashbaugh@intel.com>
Ben Nortier <bjnortier@gmail.com>
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
Binozo <70137898+Binozo@users.noreply.github.com>
Bo-Yi Wu <appleboy.tw@gmail.com>
Boris Bliznioukov <blib@mail.com>
Borislav Stanimirov <b.stanimirov@abv.bg>
@ -49,47 +69,86 @@ Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
Brian Murray <brian@bmurray.ca>
CRD716 <crd716@gmail.com>
Canis Lupus <Canis-UK@users.noreply.github.com>
Carlos Zoido <mrgalleta@gmail.com>
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
CarterLi999 <664681047@qq.com>
ChangSeok Oh <shivamidow@users.noreply.github.com>
Changyeon Kim <cyzero.kim@samsung.com>
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
Charles Xu <63788048+chaxu01@users.noreply.github.com>
Charles Xu <charles.xu@arm.com>
Chen Xi <xi2.chen@intel.com>
Chen Xi <xixichen08@foxmail.com>
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
Chidi Williams <williamschidi1@gmail.com>
Chris Elrod <elrodc@gmail.com>
Christian <12550267+iceychris@users.noreply.github.com>
Christian Kastner <ckk@kvr.at>
Clifford Heath <clifford.heath@gmail.com>
Clint Herron <hanclinto@gmail.com>
Colin <github@whoisc.cc>
Conrad Kramer <conrad@conradkramer.com>
Corey Earwood <iamcgn+github@gmail.com>
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
DAN™ <dranger003@gmail.com>
DGdev91 <DGdev91@users.noreply.github.com>
Damian Czaja <trojan295@protonmail.com>
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
Dan Johansson <dan.johansson@arm.com>
Daniel Bevenius <daniel.bevenius@gmail.com>
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
Daniel Ziegenberg <daniel@ziegenberg.at>
Daniele <57776841+daniandtheweb@users.noreply.github.com>
Dave <dave-fl@users.noreply.github.com>
Dave Airlie <airlied@gmail.com>
Dave Airlie <airlied@redhat.com>
Daven Sanassy <daven@vochlea.co.uk>
David <dnhkng@gmail.com>
David Thorpe <djt@mutablelogic.com>
DavidKorczynski <david@adalogics.com>
Davidson Francis <davidsondfgl@gmail.com>
Dener Stassun <denerstassun@gmail.com>
Dibakar Gope <dibakar.gope@arm.com>
Didzis Gosko <didzis@users.noreply.github.com>
Diego Devesa <slarengh@gmail.com>
Digipom <admin@digipom.com>
Dimo <dimo@ieee.org>
Djip007 <3705339+Djip007@users.noreply.github.com>
Djip007 <djip.perois@free.fr>
Dody Suria Wijaya <dodysw@gmail.com>
Dou Xinpeng <15529241576@163.com>
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
Duncan McConnell <ddmcconnell4@gmail.com>
Egor Egorov <me@egorfine.com>
Elkana Bardugo <ttv200@gmail.com>
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
Engininja2 <139037756+Engininja2@users.noreply.github.com>
Eric Curtin <ericcurtin17@gmail.com>
Eric Swanson <eswanson@alloscomp.com>
Eric Tendian <erictendian@gmail.com>
Eric Zhang <34133756+EZForever@users.noreply.github.com>
Erik Scholz <Green-Sky@users.noreply.github.com>
Evan Jones <evan.q.jones@gmail.com>
Evan Martin <evan.martin@gmail.com>
Eve <139727413+netrunnereve@users.noreply.github.com>
Evgeny Kuznetsov <evgeny@kuznetsov.md>
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
Fangjun Kuang <csukuangfj@gmail.com>
Felix <stenbackfelix@gmail.com>
Finn Voorhees <finnvoorhees@gmail.com>
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
Frankie Robertson <frankier@users.noreply.github.com>
Gang Chen <goncha@gmail.com>
Gavin Cai <gavin1818@hotmail.com>
George Hindle <george@georgehindle.com>
Georgi Gerganov <ggerganov@gmail.com>
Gilad S <7817232+giladgd@users.noreply.github.com>
Gilad S <giladgd@users.noreply.github.com>
Gilad S. <7817232+giladgd@users.noreply.github.com>
GitAritron <103900385+GitAritron@users.noreply.github.com>
GiviMAD <GiviMAD@users.noreply.github.com>
Gleicon Moraes <gleicon@gmail.com>
@ -98,41 +157,66 @@ Guillaume Wenzek <gwenzek@users.noreply.github.com>
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
Hang <bebound@gmail.com>
Haus1 <haus.xda@gmail.com>
Herman Semenov <GermanAizek@yandex.ru>
HimariO <dsfhe49854@gmail.com>
Hong Bo PENG <penghb@cn.ibm.com>
Hrishikesh Barman <geekodour@users.noreply.github.com>
Hugo <hugo@whynothugo.nl>
Ian Bicking <ian@ianbicking.org>
Ian Bull <irbull@eclipsesource.com>
Ihar Hrachyshka <ihrachys@redhat.com>
Ikko Ashimine <eltociear@gmail.com>
Ikko Eltociear Ashimine <eltociear@gmail.com>
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
Ivan <nekotekina@gmail.com>
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
Ivan Gorin <ivangorin21@gmail.com>
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
JJ <103335846+computerscienceiscool@users.noreply.github.com>
Jack Mousseau <jmousseau@users.noreply.github.com>
JacobLinCool <jacoblincool@gmail.com>
Jakub Ráček <blizzcz@gmail.com>
Jared Van Bortel <jared@nomic.ai>
Jay Binks <jaybinks@gmail.com>
Jayant <jayantyadav202@gmail.com>
Jeff Bolz <jbolz@nvidia.com>
Jeroen Mostert <jeroen.mostert@cm.com>
Jhen-Jie Hong <developer@jhen.me>
Jhen-Jie Hong <iainst0409@gmail.com>
JidongZhang-THU <1119708529@qq.com>
Jo Liss <joliss42@gmail.com>
Joe Todd <joe.todd@codeplay.com>
Johan <jr.raffin@gmail.com>
Johannes Gäßler <johannesg@5d6.de>
John Balis <phobossystems@gmail.com>
JohnnyB <jboero@users.noreply.github.com>
Jonathan Soo <jcsoo@agora.com>
Jonno <1160532+razodactyl@users.noreply.github.com>
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
Jose <34888496+Jerry-Master@users.noreply.github.com>
Josh Bleecher Snyder <josharian@gmail.com>
Josscii <jossciiweiyi@gmail.com>
Judd <foldl@users.noreply.github.com>
Jumper775 <78500318+jumpers775@users.noreply.github.com>
Jun Hee Yoo <contact.jhyoo@gmail.com>
Junil Kim <logyourself@gmail.com>
Justina Cho <justcho5@gmail.com>
Justine Tunney <jtunney@gmail.com>
Justine Tunney <jtunney@mozilla.com>
KITAITI Makoto <KitaitiMakoto@gmail.com>
KP Kaiser <kirk@zothcorp.com>
Kamilake <exjang0@gmail.com>
Karol Kontny <82021046+kkontny@users.noreply.github.com>
Karthick <j.karthic2004@gmail.com>
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
Kasumi <90275229+kasumi-1@users.noreply.github.com>
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
Kendrick Taylor <kendrick@circuitsix.com>
Kevin Brothaler <admin@digipom.com>
Kevin Gibbons <bakkot@gmail.com>
Konosuke Sakai <konosuke@konosuke.work>
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
Kreijstal <rainb@tfwno.gf>
Kylin <56434533+KyL0N@users.noreply.github.com>
@ -147,56 +231,110 @@ Luis Herrera <herrera-luis@users.noreply.github.com>
Lukas Rist <glaslos@gmail.com>
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
M. Eren Akbiyik <erenakbiyik@gmail.com>
Ma Mingfei <mingfei.ma@intel.com>
Maciek <maciek.mab122@gmail.com>
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
Mark Karpelès <MagicalTux@users.noreply.github.com>
Mark Zhuang <zhuangqiubin@gmail.com>
Markus Tavenrath <mtavenrath@users.noreply.github.com>
Martin Delille <martin@delille.org>
Martin Warnaar <martinwarnaar@gmail.com>
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
Mathieu Baudier <mbaudier@argeo.org>
Mathijs de Bruin <mathijs@mathijsfietst.nl>
Matija Pevec <mightymatth@users.noreply.github.com>
Matt Stephenson <mstephenson6@users.noreply.github.com>
Max Krasnyansky <max.krasnyansky@gmail.com>
Max Krasnyansky <quic_maxk@quicinc.com>
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
Meng, Hengyu <hengyu.meng@intel.com>
Mengqing Cao <cmq0113@163.com>
Michael Podvitskiy <podvitskiymichael@gmail.com>
Michael Rienstra <mrienstra@gmail.com>
Mikhail Grigorev <sleuthhound@gmail.com>
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
Mohit Agarwal <mohit@sdf.org>
Molly Sophia <mollysophia379@gmail.com>
Murilo Santana <mvrilo@gmail.com>
NETZkultur GmbH <mulholland@netzkultur.de>
Natsu <chino@hotococoa.moe>
Neil Chudleigh <nchudleigh@users.noreply.github.com>
Neo Zhang <14088817+arthw@users.noreply.github.com>
Neo Zhang Jianyu <jianyu.zhang@intel.com>
Neuman Vong <neuman.vong@gmail.com>
Nicholai Tukanov <nicholaitukanov@gmail.com>
Nicholas Albion <nalbion@yahoo.com>
Nico Bosshard <nico@bosshome.ch>
Nicolò Scipione <nicolo.scipione@codeplay.com>
Niels Mayer <Niels.Mayer@gmail.com>
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
Nikolaj Olsson <nikse.dk@gmail.com>
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
Oleg Sidorov <me@whitebox.io>
Oleg Sidorov <oleg@sidorov.nl>
Olivier Chafik <ochafik@users.noreply.github.com>
Ondrej Kokes <ondrej.kokes@gmail.com>
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
PAB <pierreantoine.bannier@gmail.com>
Paul Tsochantaris <ptsochantaris@icloud.com>
Pedro Probst <pprobst@insiberia.net>
Peng <hzp1024@qq.com>
Peter <peter277@users.noreply.github.com>
Philipp Zabel <philipp.zabel@gmail.com>
Philippe Normand <phil@base-art.net>
Philippe Normand <philn@igalia.com>
Plamen Minev <pacominev@gmail.com>
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
Przemysław Pawełczyk <przemoc@gmail.com>
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
R0CKSTAR <xiaodong.ye@mthreads.com>
R0CKSTAR <yeahdongcn@gmail.com>
Radoslav Gerganov <rgerganov@gmail.com>
Radosław Gryta <radek.gryta@gmail.com>
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
Raiya Araki <83504221+rai62@users.noreply.github.com>
Reinforce-II <fate@eastal.com>
Reinis Muiznieks <muiznieks.reinis@gmail.com>
RelatedTitle <r3latedtitle@gmail.com>
Rémy Oudompheng <oudomphe@phare.normalesup.org>
RhinoDevel <RhinoDevel@users.noreply.github.com>
Rich Jones <miserlou@gmail.com>
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
Robin <robin.xw@hotmail.com>
Roddur Dasgupta <roddurd@gmail.com>
Roland Rabien <figbug@gmail.com>
Romain Biessy <romain.biessy@codeplay.com>
Ronsor <ronsor@ronsor.pw>
Rotem Dan <rotemdan@gmail.com>
Ryan Hitchman <hitchmanr@gmail.com>
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
RyanChang <ftes90015@gmail.com>
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
SXX <sxx1136965276@gmail.com>
Sacha Arbonel <sacha.arbonel@hotmail.fr>
Salman Faroz <stsfaroz@gmail.com>
Salvatore Mesoraca <s.mesoraca16@gmail.com>
Sam <49637763+Onlyartist9@users.noreply.github.com>
Sam Pullara <spullara@gmail.com>
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
Sergio López <slp@redhat.com>
Sergio López <slp@sinrega.org>
Shanshan Shen <467638484@qq.com>
Shijie <821898965@qq.com>
Shupei Fan <dymarkfan@outlook.com>
Siddharth Ramakrishnan <srr2141@columbia.edu>
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Simon Moisselin <simon.moisstoll@gmail.com>
Sindre Sorhus <sindresorhus@gmail.com>
Slava Primenko <primenko.s@gmail.com>
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
Stefan Sydow <s.sydow@heinlein-video.de>
Stefan Sydow <stefan@sydow.email>
Syahmi Azhar <prsyahmi@gmail.com>
Syed Jafri <syedjafri97@gmail.com>
Sơn Phan Trung <phantrungson17@gmail.com>
@ -205,37 +343,63 @@ Takeshi Inoue <inoue.takeshi@gmail.com>
Tamotsu Takahashi <ttakah+github@gmail.com>
Taras Glek <taras@thegp.com>
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
Thamster <Thamster@users.noreply.github.com>
Thijs Raymakers <thijs@raymakers.nl>
Thomas Fitzsimmons <fitzsim@fitzsim.org>
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
Tienshiao Ma <tienshiao@tienshiao.org>
Tim Miller <drasticactions@users.noreply.github.com>
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
Tobrun <tobrun.van.nuland@gmail.com>
Todd <taf2@users.noreply.github.com>
Toliver <teejae@gmail.com>
Tong Li <31761981+litongjava@users.noreply.github.com>
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
Topping1 <78745143+Topping1@users.noreply.github.com>
Travis Cline <travis.cline@gmail.com>
UEXTM.com <84163508+uextm@users.noreply.github.com>
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
Vadim Peretokin <vperetokin@hey.com>
Valentin Gosu <1454649+valenting@users.noreply.github.com>
Vin Misra <vinith@alum.mit.edu>
Vulcan <93451215+trholding@users.noreply.github.com>
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
William Tambellini <william.tambellini@gmail.com>
William Tambellini <wtambellini@sdl.com>
Wilson Silva <wilson.dsigns@gmail.com>
Xiang (Kevin) Li <kevinli020508@gmail.com>
Xiao-Yong Jin <jinxiaoyong@gmail.com>
XiaotaoChen <chenxiaotao1234@gmail.com>
Xingchen Song(宋星辰) <xingchensong1996@163.com>
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
Xuan Son Nguyen <thichthat@gmail.com>
Yajing Tang <phillis@google.com>
Yang Shen <aplshenyang@gmail.com>
Yunès <jean.baptiste.yunes@free.fr>
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
Zhiyuan Li <lizhiyuan@uniartisan.com>
Zhiyuan Li <uniartisan2017@gmail.com>
Zigfrid Zvezdin <ziggerZZ@gmail.com>
Zollner <24618122+Zolliner@users.noreply.github.com>
a3sh <38979186+A3shTnT@users.noreply.github.com>
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
agray3 <agray3@users.noreply.github.com>
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
aldorof <aldorof@users.noreply.github.com>
alonfaraj <alonfaraj@gmail.com>
amd-dwang <dong.wang@amd.com>
amritahs-ibm <amritahs@linux.vnet.ibm.com>
andypayne <apayne@gmail.com>
ardfork <134447697+ardfork@users.noreply.github.com>
arizhih <40765267+arizhih@users.noreply.github.com>
automaticcat <daogiatuank54@gmail.com>
bandoti <141645996+bandoti@users.noreply.github.com>
be-next <jerome.ramette@gmail.com>
bert hubert <bert@hubertnet.nl>
billyct <billy_allen@126.com>
bmwl <brian.marshall@tolko.com>
bobqianic <129547291+bobqianic@users.noreply.github.com>
bocytko <bocytko+github@gmail.com>
@ -248,7 +412,9 @@ byte-6174 <88070277+byte-6174@users.noreply.github.com>
cdosoftei <ciprian.dosoftei@gmail.com>
clach04 <Chris.Clark@actian.com>
compilade <113953597+compilade@users.noreply.github.com>
compilade <git@compilade.net>
conradg <conradjgodfrey@gmail.com>
crummyh <elijah@crums.us>
ddpasa <112642920+ddpasa@users.noreply.github.com>
denersc <denerstassun@gmail.com>
dscripka <dscripka@users.noreply.github.com>
@ -256,28 +422,55 @@ duthils <duthils@duthils.net>
ecneladis <ecneladis@users.noreply.github.com>
faker <nspyia2002@gmail.com>
fitzsim <fitzsim@fitzsim.org>
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
genevera (she/her) <genevera@users.noreply.github.com>
geniusnut <geniusnut@gmail.com>
gilbertgong <gilbert.gong@gmail.com>
gn64 <yukikaze.jp@gmail.com>
goldwaving <77494627+goldwaving@users.noreply.github.com>
greeshmay <greeshmay@gmail.com>
haopeng <657407891@qq.com>
hipudding <huafengchun@gmail.com>
hsinhoyeh <yhh92u@gmail.com>
hydai <z54981220@gmail.com>
iamthad <thadeus.j.fleming@gmail.com>
issixx <46835150+issixx@users.noreply.github.com>
james wolf <contractorwolf@hotmail.com>
jdomke <28772296+jdomke@users.noreply.github.com>
jettoblack <jettoblack@gmail.com>
jiez <373447296@qq.com>
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
jorismertz <35079666+jorismertz@users.noreply.github.com>
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
junkfood <69683722+JunkFood02@users.noreply.github.com>
jwijffels <jwijffels@bnosac.be>
k.h.lai <adrian.k.h.lai@outlook.com>
kamranjon <kamranjon@gmail.com>
katsu560 <katsu560oo-@docomo.ne.jp>
kennethge <57784063+kenneth-ge@users.noreply.github.com>
keyehzy <msamuel@aluno.puc-rio.br>
kunnis <kunnis@users.noreply.github.com>
l3utterfly <gc.pthzfoldr@gmail.com>
leejet <leejet714@gmail.com>
leo-pony <nengjunma@outlook.com>
lhez <quic_lih@quicinc.com>
litong <31761981+litongjava@users.noreply.github.com>
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
lnyan <lkwq007@gmail.com>
luoyu-intel <yu.luo@intel.com>
m.bell <m.bell@techsmith.com>
mahorozte <41834471+mahorozte@users.noreply.github.com>
mashizora <30516315+mashizora@users.noreply.github.com>
matt23654 <matthew.webber@protonmail.com>
matteo <matteogeniaccio@yahoo.it>
mgrachten <maarten@grachten.eu>
mkiol <mkiol@users.noreply.github.com>
mky_coder <47767389+mkycoder@users.noreply.github.com>
novag <7754358+novag@users.noreply.github.com>
pajowu <pajowu@pajowu.de>
pengxin99 <pengxin.yuan@intel.com>
petterreinholdtsen <pere-github@hungry.com>
polarmoon <90010972+polarmoon@users.noreply.github.com>
rlapray <lapray.romain@gmail.com>
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
@ -287,15 +480,31 @@ shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
slaren <slarengh@gmail.com>
slashlib <slashlib@users.noreply.github.com>
snadampal <87143774+snadampal@users.noreply.github.com>
someone13574 <81528246+someone13574@users.noreply.github.com>
st-gr <38470677+st-gr@users.noreply.github.com>
stduhpf <stephduh@live.fr>
stormofice <58337328+stormofice@users.noreply.github.com>
texmex76 <40733439+texmex76@users.noreply.github.com>
thefinaldegree <thefinaldegree@gmail.com>
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
toboil-features <160222185+toboil-features@users.noreply.github.com>
trixirt <trix@redhat.com>
ulatekh <ulatekh@yahoo.com>
undef <undefdev@gmail.com>
uvos <devnull@uvos.xyz>
uvos <philipp@uvos.xyz>
valVk <valVk@users.noreply.github.com>
venkr <venkateshrameshkumar+1@gmail.com>
vicalloy <zbirder@gmail.com>
wangshuai09 <391746016@qq.com>
woachk <24752637+woachk@users.noreply.github.com>
xctan <axunlei@gmail.com>
xdrudis <xavierdrudis@yahoo.es>
yuri@FreeBSD <yuri@FreeBSD>
zhangjixiong <code.zjx@gmail.com>
zhentaoyu <zhentao.yu@intel.com>
zhouwg <6889919+zhouwg@users.noreply.github.com>
zhouwg <zhouwg2000@gmail.com>
谢乃闻 <sienaiwun@users.noreply.github.com>
布客飞龙 <562826179@qq.com>
Артём Земляк <azemlyak@smart-consulting.ru>

View File

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
project("whisper.cpp" C CXX)
project("whisper.cpp" VERSION 1.7.1)
project("whisper.cpp" VERSION 1.7.4)
include(CheckIncludeFileCXX)
set(SOVERSION 1)

1118
Makefile

File diff suppressed because it is too large Load Diff

View File

@ -1,60 +0,0 @@
// swift-tools-version:5.5
import PackageDescription
let package = Package(
name: "whisper",
platforms: [
.macOS(.v12),
.iOS(.v14),
.watchOS(.v4),
.tvOS(.v14)
],
products: [
.library(name: "whisper", targets: ["whisper"]),
],
targets: [
.target(
name: "whisper",
path: ".",
exclude: [
"bindings",
"cmake",
"coreml",
"examples",
"extra",
"models",
"samples",
"tests",
"CMakeLists.txt",
"Makefile"
],
sources: [
"ggml/src/ggml.c",
"src/whisper.cpp",
"ggml/src/ggml-aarch64.c",
"ggml/src/ggml-alloc.c",
"ggml/src/ggml-backend.cpp",
"ggml/src/ggml-quants.c",
"ggml/src/ggml-metal.m"
],
resources: [.process("ggml-metal.metal")],
publicHeadersPath: "spm-headers",
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.define("GGML_USE_ACCELERATE"),
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_USE_METAL")
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
],
linkerSettings: [
.linkedFramework("Accelerate")
]
)
],
cxxLanguageStandard: .cxx11
)

381
README.md
View File

@ -7,21 +7,25 @@
[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
Stable: [v1.7.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
> [!NOTE]
> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788
Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
- Plain C/C++ implementation without dependencies
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](https://github.com/ggerganov/whisper.cpp#core-ml-support)
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
- AVX intrinsics support for x86 architectures
- VSX intrinsics support for POWER architectures
- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
- Mixed F16 / F32 precision
- [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
- [Integer quantization support](#quantization)
- Zero memory allocations at runtime
- [Vulkan support](#vulkan-gpu-support)
- Support for CPU-only inference
- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
- [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
- [Ascend NPU Support](https://github.com/ggerganov/whisper.cpp#ascend-npu-support)
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
- [OpenVINO Support](#openvino-support)
- [Ascend NPU Support](#ascend-npu-support)
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
Supported platforms:
@ -52,18 +56,6 @@ On Apple Silicon, the inference runs fully on the GPU via Metal:
https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
## Implementation details
- The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
- Sample usage is demonstrated in [main.cpp](examples/main)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
- Various other examples are available in the [examples](examples) folder
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
## Quick start
First clone the repository:
@ -72,140 +64,38 @@ First clone the repository:
git clone https://github.com/ggerganov/whisper.cpp.git
```
Navigate into the directory:
```
cd whisper.cpp
```
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
```bash
sh ./models/download-ggml-model.sh base.en
```
Now build the [main](examples/main) example and transcribe an audio file like this:
Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
```bash
# build the main example
make
# build the project
cmake -B build
cmake --build build --config Release
# transcribe an audio file
./main -f samples/jfk.wav
./build/bin/whisper-cli -f samples/jfk.wav
```
---
For a quick demo, simply run `make base.en`:
```text
$ make base.en
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main -framework Accelerate
./main -h
usage: ./main [options] file0.wav file1.wav ...
options:
-h, --help [default] show this help message and exit
-t N, --threads N [4 ] number of threads to use during computation
-p N, --processors N [1 ] number of processors to use during computation
-ot N, --offset-t N [0 ] time offset in milliseconds
-on N, --offset-n N [0 ] segment index offset
-d N, --duration N [0 ] duration of audio to process in milliseconds
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
-ml N, --max-len N [0 ] maximum segment length in characters
-sow, --split-on-word [false ] split on word rather than on token
-bo N, --best-of N [5 ] number of best candidates to keep
-bs N, --beam-size N [5 ] beam size for beam search
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
-tr, --translate [false ] translate from source language to english
-di, --diarize [false ] stereo audio diarization
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
-nf, --no-fallback [false ] do not use temperature fallback while decoding
-otxt, --output-txt [false ] output result in a text file
-ovtt, --output-vtt [false ] output result in a vtt file
-osrt, --output-srt [false ] output result in a srt file
-olrc, --output-lrc [false ] output result in a lrc file
-owts, --output-words [false ] output script for generating karaoke video
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
-ocsv, --output-csv [false ] output result in a CSV file
-oj, --output-json [false ] output result in a JSON file
-ojf, --output-json-full [false ] include more information in the JSON file
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
-ps, --print-special [false ] print special tokens
-pc, --print-colors [false ] print colors
-pp, --print-progress [false ] print progress
-nt, --no-timestamps [false ] do not print timestamps
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
-dl, --detect-language [false ] exit after automatically detecting language
--prompt PROMPT [ ] initial prompt
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
-f FNAME, --file FNAME [ ] input WAV file path
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
-ls, --log-score [false ] log best decoder scores of tokens
-ng, --no-gpu [false ] disable GPU
sh ./models/download-ggml-model.sh base.en
Downloading ggml model base.en ...
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
You can now use it like this:
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
===============================================
Running base.en on all samples in ./samples ...
===============================================
----------------------------------------------
[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
----------------------------------------------
whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab = 51864
whisper_model_load: n_audio_ctx = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx = 448
whisper_model_load: n_text_state = 512
whisper_model_load: n_text_head = 8
whisper_model_load: n_text_layer = 6
whisper_model_load: n_mels = 80
whisper_model_load: f16 = 1
whisper_model_load: type = 2
whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
whisper_model_load: kv self size = 5.25 MB
whisper_model_load: kv cross size = 17.58 MB
whisper_model_load: adding 1607 extra tokens
whisper_model_load: model ctx = 140.60 MB
whisper_model_load: model size = 140.54 MB
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
[00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
whisper_print_timings: fallbacks = 0 p / 0 h
whisper_print_timings: load time = 113.81 ms
whisper_print_timings: mel time = 15.40 ms
whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
whisper_print_timings: total time = 476.31 ms
```
For a quick demo, simply run `make base.en`.
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
For detailed usage instructions, run: `./main -h`
For detailed usage instructions, run: `./build/bin/whisper-cli -h`
Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
For example, you can use `ffmpeg` like this:
```bash
@ -217,7 +107,7 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
If you want some extra audio samples to play with, simply run:
```
make samples
make -j samples
```
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
@ -225,18 +115,18 @@ This will download a few more audio files from Wikipedia and convert them to 16-
You can download and run the other models as follows:
```
make tiny.en
make tiny
make base.en
make base
make small.en
make small
make medium.en
make medium
make large-v1
make large-v2
make large-v3
make large-v3-turbo
make -j tiny.en
make -j tiny
make -j base.en
make -j base
make -j small.en
make -j small
make -j medium.en
make -j medium
make -j large-v1
make -j large-v2
make -j large-v3
make -j large-v3-turbo
```
## Memory usage
@ -249,6 +139,20 @@ make large-v3-turbo
| medium | 1.5 GiB | ~2.1 GB |
| large | 2.9 GiB | ~3.9 GB |
## POWER VSX Intrinsics
`whisper.cpp` supports POWER architectures and includes code which
significantly speeds operation on Linux running on POWER9/10, making it
capable of faster-than-realtime transcription on underclocked Raptor
Talos II. Ensure you have a BLAS package installed, and replace the
standard cmake setup with:
```bash
# build with GGML_BLAS defined
cmake -B build -DGGML_BLAS=1
cmake --build build --config Release
./build/bin/whisper-cli [ .. etc .. ]
## Quantization
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
@ -258,11 +162,12 @@ Here are the steps for creating and using a quantized model:
```bash
# quantize a model with Q5_0 method
make quantize
./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
cmake -B build
cmake --build build --config Release
./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
# run the examples as usual, specifying the quantized model file
./main -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
```
## Core ML support
@ -296,10 +201,6 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in
- Build `whisper.cpp` with Core ML support:
```bash
# using Makefile
make clean
WHISPER_COREML=1 make -j
# using CMake
cmake -B build -DWHISPER_COREML=1
cmake --build build -j --config Release
@ -308,7 +209,7 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in
- Run the examples as usual. For example:
```text
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
...
@ -392,7 +293,7 @@ This can result in significant speedup in encoder performance. Here are the inst
- Run the examples as usual. For example:
```text
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
...
@ -409,7 +310,7 @@ This can result in significant speedup in encoder performance. Here are the inst
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
cached for the next run.
For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
## NVIDIA GPU support
@ -419,8 +320,18 @@ First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-do
Now build `whisper.cpp` with CUDA support:
```
make clean
GGML_CUDA=1 make -j
cmake -B build -DGGML_CUDA=1
cmake --build build -j --config Release
```
## Vulkan GPU support
Cross-vendor solution which allows you to accelerate workload on your GPU.
First, make sure your graphics card driver provides support for Vulkan API.
Now build `whisper.cpp` with Vulkan support:
```
cmake -B build -DGGML_VULKAN=1
cmake --build build -j --config Release
```
## BLAS CPU support via OpenBLAS
@ -431,28 +342,13 @@ First, make sure you have installed `openblas`: https://www.openblas.net/
Now build `whisper.cpp` with OpenBLAS support:
```
make clean
GGML_OPENBLAS=1 make -j
```
## BLAS CPU support via Intel MKL
Encoder processing can be accelerated on the CPU via the BLAS compatible interface of Intel's Math Kernel Library.
First, make sure you have installed Intel's MKL runtime and development packages: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html
Now build `whisper.cpp` with Intel MKL BLAS support:
```
source /opt/intel/oneapi/setvars.sh
mkdir build
cd build
cmake -DWHISPER_MKL=ON ..
WHISPER_MKL=1 make -j
cmake -B build -DGGML_BLAS=1
cmake --build build -j --config Release
```
## Ascend NPU support
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
First, check if your Ascend NPU device is supported:
@ -466,16 +362,14 @@ Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/
Now build `whisper.cpp` with CANN support:
```
mkdir build
cd build
cmake .. -D GGML_CANN=on
make -j
cmake -B build -DGGML_CANN=1
cmake --build build -j --config Release
```
Run the inference examples as usual, for example:
```
./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
```
*Notes:*
@ -529,89 +423,6 @@ For detailed instructions on how to use Conan, please refer to the [Conan docume
- Inference only
## Another example
Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
in about half a minute on a MacBook M1 Pro, using `medium.en` model:
<details>
<summary>Expand to see the result</summary>
```text
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab = 51864
whisper_model_load: n_audio_ctx = 1500
whisper_model_load: n_audio_state = 1024
whisper_model_load: n_audio_head = 16
whisper_model_load: n_audio_layer = 24
whisper_model_load: n_text_ctx = 448
whisper_model_load: n_text_state = 1024
whisper_model_load: n_text_head = 16
whisper_model_load: n_text_layer = 24
whisper_model_load: n_mels = 80
whisper_model_load: f16 = 1
whisper_model_load: type = 4
whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
whisper_model_load: kv self size = 42.00 MB
whisper_model_load: kv cross size = 140.62 MB
whisper_model_load: adding 1607 extra tokens
whisper_model_load: model ctx = 1462.35 MB
whisper_model_load: model size = 1462.12 MB
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
[00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
[00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
[00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
[00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
[00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
[00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
[00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
[00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
[00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
[00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
[00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
[00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
[00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
[00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
[00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
[00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
[00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
[00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
[00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
[00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
[00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
[00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
[00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
[00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
[00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
[00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
[00:02:35.000 --> 00:02:39.000] and calls them each by name."
[00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
[00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
[00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
[00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
[00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
[00:03:13.000 --> 00:03:19.000] [Silence]
whisper_print_timings: fallbacks = 1 p / 0 h
whisper_print_timings: load time = 569.03 ms
whisper_print_timings: mel time = 146.85 ms
whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
whisper_print_timings: total time = 32733.52 ms
```
</details>
## Real-time audio input example
This is a naive example of performing real-time inference on audio from your microphone.
@ -619,8 +430,9 @@ The [stream](examples/stream) tool samples the audio every half a second and run
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
```bash
make stream
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
cmake -B build -DWHISPER_SDL2=ON
cmake --build build --config Release
./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
```
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
@ -631,7 +443,7 @@ Adding the `--print-colors` argument will print the transcribed text using an ex
to highlight words with high or low confidence:
```bash
./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
```
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
@ -641,7 +453,7 @@ to highlight words with high or low confidence:
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
```text
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
whisper_model_load: loading model from './models/ggml-base.en.bin'
...
@ -665,7 +477,7 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
```text
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
whisper_model_load: loading model from './models/ggml-base.en.bin'
...
@ -712,7 +524,7 @@ Sample usage:
./models/download-ggml-model.sh small.en-tdrz
# run as usual, adding the "-tdrz" command-line argument
./main -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
...
main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
...
@ -729,14 +541,14 @@ main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 pr
## Karaoke-style movie generation (experimental)
The [main](examples/main) example provides support for output of karaoke-style movies, where the
The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
This requires to have `ffmpeg` installed.
Here are a few _"typical"_ examples:
```bash
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
source ./samples/jfk.wav.wts
ffplay ./samples/jfk.wav.mp4
```
@ -746,7 +558,7 @@ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b
---
```bash
./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
source ./samples/mm0.wav.wts
ffplay ./samples/mm0.wav.mp4
```
@ -756,7 +568,7 @@ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-9
---
```bash
./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
source ./samples/gb0.wav.wts
ffplay ./samples/gb0.wav.mp4
```
@ -781,7 +593,7 @@ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8
## Benchmarks
In order to have an objective comparison of the performance of the inference across different system configurations,
use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
took to execute it. The results are summarized in the following Github issue:
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
@ -844,13 +656,12 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
| Example | Web | Description |
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
| [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API |
| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
@ -858,7 +669,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
| [server](examples/server) | | HTTP transcription server with OAI-like API |
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)

View File

@ -9,22 +9,23 @@ import (
// ContextForSignal returns a context object which is cancelled when a signal
// is received. It returns nil if no signal parameter is provided
func ContextForSignal(signals ...os.Signal) context.Context {
if len(signals) == 0 {
return nil
}
if len(signals) == 0 {
return nil
}
ch := make(chan os.Signal)
ctx, cancel := context.WithCancel(context.Background())
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
ctx, cancel := context.WithCancel(context.Background())
// Send message on channel when signal received
signal.Notify(ch, signals...)
// Send message on channel when signal received
signal.Notify(ch, signals...)
// When any signal received, call cancel
go func() {
<-ch
cancel()
}()
// When any signal is received, call cancel
go func() {
<-ch
cancel()
}()
// Return success
return ctx
// Return success
return ctx
}

View File

@ -9,6 +9,7 @@ import (
"net/url"
"os"
"path/filepath"
"strings"
"syscall"
"time"
)
@ -17,14 +18,27 @@ import (
// CONSTANTS
const (
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
)
var (
// The models which will be downloaded, if no model is specified as an argument
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
modelNames = []string{
"tiny", "tiny-q5_1", "tiny-q8_0",
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
"base", "base-q5_1", "base-q8_0",
"base.en", "base.en-q5_1", "base.en-q8_0",
"small", "small-q5_1", "small-q8_0",
"small.en", "small.en-q5_1", "small.en-q8_0",
"medium", "medium-q5_0", "medium-q8_0",
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
"large-v1",
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
"large-v3", "large-v3-q5_0",
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
}
)
var (
@ -44,7 +58,25 @@ var (
func main() {
flag.Usage = func() {
name := filepath.Base(flag.CommandLine.Name())
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
fmt.Fprintf(flag.CommandLine.Output(), `
Usage: %s [options] [<model>...]
Options:
-out string Specify the output folder where models will be saved.
Default: Current working directory.
-timeout duration Set the maximum duration for downloading a model.
Example: 10m, 1h (default: 30m0s).
-quiet Suppress all output except errors.
Examples:
1. Download a specific model:
%s -out ./models tiny-q8_0
2. Download all models:
%s -out ./models
`, name, name, name)
flag.PrintDefaults()
}
flag.Parse()
@ -114,23 +146,87 @@ func GetOut() (string, error) {
// GetModels returns the list of models to download
func GetModels() []string {
if flag.NArg() == 0 {
return modelNames
} else {
return flag.Args()
fmt.Println("No model specified.")
fmt.Println("Preparing to download all models...")
// Calculate total download size
fmt.Println("Calculating total download size...")
totalSize, err := CalculateTotalDownloadSize(modelNames)
if err != nil {
fmt.Println("Error calculating download sizes:", err)
os.Exit(1)
}
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
fmt.Println("Would you like to download all models? (y/N)")
// Prompt for user input
var response string
fmt.Scanln(&response)
if response != "y" && response != "Y" {
fmt.Println("Aborting. Specify a model to download.")
os.Exit(0)
}
return modelNames // Return all models if confirmed
}
return flag.Args() // Return specific models if arguments are provided
}
func CalculateTotalDownloadSize(models []string) (int64, error) {
var totalSize int64
client := http.Client{}
for _, model := range models {
modelURL, err := URLForModel(model)
if err != nil {
return 0, err
}
// Issue a HEAD request to get the file size
req, err := http.NewRequest("HEAD", modelURL, nil)
if err != nil {
return 0, err
}
resp, err := client.Do(req)
if err != nil {
return 0, err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
continue
}
size := resp.ContentLength
totalSize += size
}
return totalSize, nil
}
// URLForModel returns the URL for the given model on huggingface.co
func URLForModel(model string) (string, error) {
// Ensure "ggml-" prefix is added only once
if !strings.HasPrefix(model, "ggml-") {
model = "ggml-" + model
}
// Ensure ".bin" extension is added only once
if filepath.Ext(model) != srcExt {
model += srcExt
}
// Parse the base URL
url, err := url.Parse(srcUrl)
if err != nil {
return "", err
} else {
url.Path = filepath.Join(url.Path, model)
}
// Ensure no trailing slash in the base URL
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
return url.String(), nil
}

View File

@ -67,5 +67,5 @@ copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32
## License
The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.

View File

@ -181,11 +181,11 @@ public class WhisperFullParams extends Structure {
}
/** Flag to suppress non-speech tokens. */
public CBool suppress_non_speech_tokens;
public CBool suppress_nst;
/** Flag to suppress non-speech tokens. */
public void suppressNonSpeechTokens(boolean enable) {
suppress_non_speech_tokens = enable ? CBool.TRUE : CBool.FALSE;
suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
}
/** Initial decoding temperature. */
@ -315,7 +315,7 @@ public class WhisperFullParams extends Structure {
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
"suppress_blank", "suppress_non_speech_tokens", "temperature", "max_initial_ts", "length_penalty",
"suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
"new_segment_callback", "new_segment_callback_user_data",
"progress_callback", "progress_callback_user_data",

View File

@ -1,6 +1,6 @@
{
"name": "whisper.cpp",
"version": "1.7.1",
"version": "1.7.4",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {

3
bindings/ruby/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
LICENSE
pkg/
lib/whisper.*

245
bindings/ruby/README.md Normal file
View File

@ -0,0 +1,245 @@
whispercpp
==========
![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
Installation
------------
Install the gem and add to the application's Gemfile by executing:
$ bundle add whispercpp
If bundler is not being used to manage dependencies, install the gem by executing:
$ gem install whispercpp
Usage
-----
```ruby
require "whisper"
whisper = Whisper::Context.new("base")
params = Whisper::Params.new(
language: "en",
offset: 10_000,
duration: 60_000,
max_text_tokens: 300,
translate: true,
print_timestamps: false,
initial_prompt: "Initial prompt here."
)
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
puts whole_text
end
```
### Preparing model ###
Some models are prepared up-front:
```ruby
base_en = Whisper::Model.pre_converted_models["base.en"]
whisper = Whisper::Context.new(base_en)
```
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
```ruby
Whisper::Model.pre_converted_models["base"].clear_cache
```
You also can use shorthand for pre-converted models:
```ruby
whisper = Whisper::Context.new("base.en")
```
You can see the list of prepared model names by `Whisper::Model.pre_converted_models.keys`:
```ruby
puts Whisper::Model.pre_converted_models.keys
# tiny
# tiny.en
# tiny-q5_1
# tiny.en-q5_1
# tiny-q8_0
# base
# base.en
# base-q5_1
# base.en-q5_1
# base-q8_0
# :
# :
```
You can also use local model files you prepared:
```ruby
whisper = Whisper::Context.new("path/to/your/model.bin")
```
Or, you can download model files:
```ruby
whisper = Whisper::Context.new("https://example.net/uri/of/your/model.bin")
# Or
whisper = Whisper::Context.new(URI("https://example.net/uri/of/your/model.bin"))
```
See [models][] page for details.
### Preparing audio file ###
Currently, whisper.cpp accepts only 16-bit WAV files.
API
---
### Segments ###
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
```ruby
def format_time(time_ms)
sec, decimal_part = time_ms.divmod(1000)
min, sec = sec.divmod(60)
hour, min = min.divmod(60)
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
end
whisper
.transcribe("path/to/audio.wav", params)
.each_segment.with_index do |segment, index|
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
nth: index + 1,
st: format_time(segment.start_time),
ed: format_time(segment.end_time),
text: segment.text
}
line << " (speaker turned)" if segment.speaker_next_turn?
puts line
end
```
You can also add hook to params called on new segment:
```ruby
# Add hook before calling #transcribe
params.on_new_segment do |segment|
line = "[%{st} --> %{ed}] %{text}" % {
st: format_time(segment.start_time),
ed: format_time(segment.end_time),
text: segment.text
}
line << " (speaker turned)" if segment.speaker_next_turn?
puts line
end
whisper.transcribe("path/to/audio.wav", params)
```
### Models ###
You can see model information:
```ruby
whisper = Whisper::Context.new("base")
model = whisper.model
model.n_vocab # => 51864
model.n_audio_ctx # => 1500
model.n_audio_state # => 512
model.n_audio_head # => 8
model.n_audio_layer # => 6
model.n_text_ctx # => 448
model.n_text_state # => 512
model.n_text_head # => 8
model.n_text_layer # => 6
model.n_mels # => 80
model.ftype # => 1
model.type # => "base"
```
### Logging ###
You can set log callback:
```ruby
prefix = "[MyApp] "
log_callback = ->(level, buffer, user_data) {
case level
when Whisper::LOG_LEVEL_NONE
puts "#{user_data}none: #{buffer}"
when Whisper::LOG_LEVEL_INFO
puts "#{user_data}info: #{buffer}"
when Whisper::LOG_LEVEL_WARN
puts "#{user_data}warn: #{buffer}"
when Whisper::LOG_LEVEL_ERROR
puts "#{user_data}error: #{buffer}"
when Whisper::LOG_LEVEL_DEBUG
puts "#{user_data}debug: #{buffer}"
when Whisper::LOG_LEVEL_CONT
puts "#{user_data}same to previous: #{buffer}"
end
}
Whisper.log_set log_callback, prefix
```
Using this feature, you are also able to suppress log:
```ruby
Whisper.log_set ->(level, buffer, user_data) {
# do nothing
}, nil
Whisper::Context.new("base")
```
### Low-level API to transcribe ###
You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
```ruby
require "whisper"
require "wavefile"
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
whisper = Whisper::Context.new("base")
whisper
.full(Whisper::Params.new, samples)
.each_segment do |segment|
puts segment.text
end
```
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
Development
-----------
% git clone https://github.com/ggerganov/whisper.cpp.git
% cd whisper.cpp/bindings/ruby
% rake test
First call of `rake test` builds an extension and downloads a model for testing. After that, you add tests in `tests` directory and modify `ext/ruby_whisper.cpp`.
If something seems wrong on build, running `rake clean` solves some cases.
License
-------
The same to [whisper.cpp][].
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models

View File

@ -1,12 +1,66 @@
require 'rake/clean'
require 'rubygems/package'
require "bundler/gem_tasks"
require "rake/testtask"
require_relative "extsources"
desc 'Build gem'
task :package do
spec_source = File.read File.join(File.dirname(__FILE__),'whispercpp.gemspec')
spec = nil
# see: http://gist.github.com/16215
Thread.new { spec = eval("#{spec_source}") }.join
spec.validate
Gem::Package.build(spec)
SOURCES = FileList[]
EXTSOURCES.each do |src|
basename = src.pathmap("%f")
dest = basename == "LICENSE" ? basename : src.pathmap("%{../..,ext}p")
dir = dest.pathmap("%d")
file src
directory dir
file dest => [src, dir] do |t|
cp t.source, t.name
end
SOURCES.include dest
end
CLEAN.include SOURCES
CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"]
SRC = FileList["ext/*.{c,cpp,h}"]
task build: SOURCES
directory "pkg"
CLOBBER.include "pkg"
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
SO_FILE = File.join("ext", LIB_NAME)
LIB_FILE = File.join("lib", LIB_NAME)
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
chdir "ext" do
ruby "extconf.rb"
end
end
file SO_FILE => "ext/Makefile" do |t|
chdir "ext" do
sh "make"
end
end
CLEAN.include SO_FILE
directory "lib"
file LIB_FILE => [SO_FILE, "lib"] do |t|
copy t.source, t.name
end
CLEAN.include LIB_FILE
Rake::TestTask.new do |t|
t.test_files = FileList["tests/test_*.rb"]
end
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
chdir "tests/jfk_reader" do
ruby "extconf.rb"
sh "make"
end
end
CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
task test: [LIB_FILE, TEST_MEMORY_VIEW]

View File

@ -1,9 +1,11 @@
Makefile
ggml.c
ggml.h
ggml-alloc.c
ggml-alloc.h
whisper.so
whisper.bundle
whisper.cpp
whisper.h
dr_wav.h
whisper.dll
scripts/get-flags.mk
*.o
/*/**/*.c
/*/**/*.cpp
/*/**/*.h
/*/**/*.m
/*/**/*.metal

9
bindings/ruby/ext/cpu.mk Normal file
View File

@ -0,0 +1,9 @@
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
ggml/src/ggml-cpu/ggml-cpu.cpp \
ggml/include/ggml-backend.h \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h \
ggml/src/ggml-backend-impl.h \
ggml/include/ggml-cpu.h \
ggml/src/ggml-impl.h
$(CXX) $(CXXFLAGS) -c $< -o $@

View File

@ -1,24 +1,10 @@
require 'mkmf'
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.cpp')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-common.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
# need to use c++ compiler flags
$CXXFLAGS << ' -std=c++11'
$CXXFLAGS << ' -std=c++17'
$LDFLAGS << ' -lstdc++'
# Set to true when building binary gems
if enable_config('static-stdlib', false)
$LDFLAGS << ' -static-libgcc -static-libstdc++'
@ -29,4 +15,194 @@ if enable_config('march-tune-native', false)
$CXXFLAGS << ' -march=native -mtune=native'
end
if ENV['WHISPER_METAL']
$GGML_METAL ||= true
$DEPRECATE_WARNING ||= true
end
$UNAME_S = `uname -s`.chomp
$UNAME_P = `uname -p`.chomp
$UNAME_M = `uname -m`.chomp
if $UNAME_S == 'Darwin'
unless ENV['GGML_NO_METAL']
$GGML_METAL ||= true
end
$GGML_NO_OPENMP ||= true
end
if $GGML_METAL
$GGML_METAL_EMBED_LIBRARY = true
end
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
$MK_CFLAGS = '-std=c11 -fPIC'
$MK_CXXFLAGS = '-std=c++17 -fPIC'
$MK_NVCCFLAGS = '-std=c++17'
$MK_LDFLAGS = ''
$OBJ_GGML = []
$OBJ_WHISPER = []
$OBJ_COMMON = []
$OBJ_SDL = []
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
if $UNAME_S == 'Linux'
$MK_CPPFLAGS << ' -D_GNU_SOURCE'
end
if $UNAME_S == 'Darwin'
$MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
end
if ENV['WHISPER_DEBUG']
$MK_CFLAGS << ' -O0 -g'
$MK_CXXFLAGS << ' -O0 -g'
$MK_LDFLAGS << ' -g'
$MK_NVCCFLAGS << ' -O0 -g'
else
$MK_CPPFLAGS << ' -DNDEBUG'
$MK_CFLAGS << ' -O3'
$MK_CXXFLAGS << ' -O3'
$MK_NVCCFLAGS << ' -O3'
end
$WARN_FLAGS =
' -Wall' <<
' -Wextra' <<
' -Wpedantic' <<
' -Wcast-qual' <<
' -Wno-unused-function'
$MK_CFLAGS <<
$WARN_FLAGS <<
' -Wshadow' <<
' -Wstrict-prototypes' <<
' -Wpointer-arith' <<
' -Wmissing-prototypes' <<
' -Werror=implicit-int' <<
' -Werror=implicit-function-declaration'
$MK_CXXFLAGS <<
$WARN_FLAGS <<
' -Wmissing-declarations' <<
' -Wmissing-noreturn'
unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
$MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
end
if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
$MK_CFLAGS << ' -pthread'
$MK_CXXFLAGS << ' -pthread'
end
unless $_WIN32
$DSO_EXT = '.so'
else
$DSO_EXT = '.dll'
end
unless ENV['RISCV']
if %w[x86_64 i686 amd64].include? $UNAME_M
$HOST_CXXFLAGS ||= ''
$MK_CFLAGS << ' -march=native -mtune=native'
$HOST_CXXFLAGS << ' -march=native -mtune=native'
end
else
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
end
unless ENV['GGML_NO_ACCELERATE']
if $UNAME_S == 'Darwin'
$MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE'
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
$MK_LDFLAGS << ' -framework Accelerate'
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
end
end
if ENV['GGML_OPENBLAS']
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
end
if ENV['GGML_OPENBLAS64']
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
end
if $GGML_METAL
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal.o'
if ENV['GGML_METAL_NDEBUG']
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
end
if $GGML_METAL_EMBED_LIBRARY
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal-embed.o'
end
end
$OBJ_GGML <<
'ggml/src/ggml.o' <<
'ggml/src/ggml-alloc.o' <<
'ggml/src/ggml-backend.o' <<
'ggml/src/ggml-backend-reg.o' <<
'ggml/src/ggml-opt.o' <<
'ggml/src/ggml-quants.o' <<
'ggml/src/ggml-threading.o' <<
'ggml/src/ggml-cpu/ggml-cpu.o' <<
'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
$OBJ_WHISPER <<
'src/whisper.o' <<
'examples/common.o' <<
'examples/common-whisper.o'
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
$objs <<
"ruby_whisper.o" <<
"ruby_whisper_context.o" <<
"ruby_whisper_transcribe.o" <<
"ruby_whisper_params.o" <<
"ruby_whisper_error.o" <<
"ruby_whisper_segment.o" <<
"ruby_whisper_model.o"
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
$CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
create_makefile('whisper')
File.open 'Makefile', 'a' do |file|
file.puts 'include scripts/get-flags.mk'
file.puts 'include cpu.mk'
if $GGML_METAL
file.puts 'include metal.mk'
if $GGML_METAL_EMBED_LIBRARY
file.puts 'include metal-embed.mk'
end
end
end

View File

@ -1,141 +0,0 @@
#pragma once
// ggml-backend internal header
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// Backend buffer
//
// buffer type
typedef void * ggml_backend_buffer_type_context_t;
struct ggml_backend_buffer_type_i {
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
// check if tensor data is in host memory
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
};
struct ggml_backend_buffer_type {
struct ggml_backend_buffer_type_i iface;
ggml_backend_buffer_type_context_t context;
};
// buffer
typedef void * ggml_backend_buffer_context_t;
struct ggml_backend_buffer_i {
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
};
struct ggml_backend_buffer {
struct ggml_backend_buffer_i iface;
ggml_backend_buffer_type_t buft;
ggml_backend_buffer_context_t context;
size_t size;
enum ggml_backend_buffer_usage usage;
};
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
ggml_backend_buffer_type_t buft,
struct ggml_backend_buffer_i iface,
ggml_backend_buffer_context_t context,
size_t size);
// do not use directly, use ggml_backend_tensor_copy instead
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
// buffer that contains a collection of buffers
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
//
// Backend
//
typedef void * ggml_backend_context_t;
struct ggml_backend_i {
const char * (*GGML_CALL get_name)(ggml_backend_t backend);
void (*GGML_CALL free)(ggml_backend_t backend);
// buffer allocation
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
// (optional) asynchronous tensor data access
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
// (optional) complete all pending operations
void (*GGML_CALL synchronize)(ggml_backend_t backend);
// compute graph with a plan (not used currently)
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph with a plan
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph without a plan (async)
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
// check if the backend supports an operation
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
// these should be expensive operations with large batch sizes that may benefit from running on this backend
// even if the weight has to be copied from the CPU temporarily
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
// (optional) event synchronization
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
void (*GGML_CALL event_free) (ggml_backend_event_t event);
void (*GGML_CALL event_record) (ggml_backend_event_t event);
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
};
struct ggml_backend {
ggml_guid_t guid;
struct ggml_backend_i iface;
ggml_backend_context_t context;
};
struct ggml_backend_event {
ggml_backend_t backend;
void * context;
};
//
// Backend registry
//
typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,233 +0,0 @@
#pragma once
#include "ggml.h"
#include "ggml-alloc.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend_event * ggml_backend_event_t;
typedef struct ggml_backend * ggml_backend_t;
typedef void * ggml_backend_graph_plan_t;
//
// Backend buffer
//
// buffer type
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
// buffer
enum ggml_backend_buffer_usage {
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
};
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
//
// Backend
//
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
GGML_API void ggml_backend_free(ggml_backend_t backend);
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
// asynchronous copy
// the copy is performed after all the currently queued operations in backend_src
// backend_dst will wait for the copy to complete before performing other operations
// automatic fallback to sync copy if async is not supported
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
// events
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
//
// CPU backend
//
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
// Create a backend buffer from an existing pointer
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
#ifdef GGML_USE_CPU_HBM
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
#endif
//
// Backend registry
//
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
GGML_API size_t ggml_backend_reg_get_count(void);
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
GGML_API const char * ggml_backend_reg_get_name(size_t i);
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
//
// Backend scheduler
//
// The backend scheduler allows for multiple backends to be used together
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
// The backends are selected based on:
// - the backend that supports the operation
// - the location of the pre-allocated tensors (e.g. the weights)
/*
Example usage:
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
// preferrably to run on the same backend as the buffer
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
// initialize buffers from a max size graph (optional)
reserve_graph = build_graph(sched, max_batch_size);
// manually assign nodes to a backend (optional, should not be needed in most cases)
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
ggml_backend_sched_reserve(sched, reserve_graph);
// compute
graph = build_graph(sched);
ggml_backend_sched_graph_compute(sched, graph);
// if there are graph inputs:
ggml_backend_sched_reset(sched);
ggml_backend_sched_alloc_graph(sched, graph);
ggml_backend_tensor_set(input_tensor, ...);
ggml_backend_sched_graph_compute(sched, graph);
}
*/
struct ggml_backend_sched;
typedef struct ggml_backend_sched * ggml_backend_sched_t;
// when ask == true, the scheduler wants to know if the user wants to observe this node
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
//
// when ask == false, the scheduler is passing the node tensor to the user for observation
// if the user returns false, the scheduler will cancel the graph compute
//
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
// Initialize a backend scheduler
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
// Initialize backend buffers from a measure graph
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
// Get the number of splits of the last graph
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
// Allocate and compute graph on the backend scheduler
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
// Reset all assignments and allocators - must be called before changing the node backends
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
// Set a callback to be called for each resulting node during graph compute
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
//
// Utils
//
struct ggml_backend_graph_copy {
ggml_backend_buffer_t buffer;
struct ggml_context * ctx_allocated;
struct ggml_context * ctx_unallocated;
struct ggml_cgraph * graph;
};
// Copy a graph to a different backend
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
// Compare the output of two backends
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
// Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +0,0 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef GGML_USE_HIPBLAS
#define GGML_CUDA_NAME "ROCm"
#define GGML_CUBLAS_NAME "hipBLAS"
#else
#define GGML_CUDA_NAME "CUDA"
#define GGML_CUBLAS_NAME "cuBLAS"
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_CUDA_MAX_DEVICES 16
// backend API
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
// device buffer
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
// split tensor buffer that splits matrices by rows across multiple devices
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
#ifdef __cplusplus
}
#endif

View File

@ -1,272 +0,0 @@
#pragma once
#include "ggml.h"
// GGML internal header
#include <assert.h>
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
#include <stddef.h>
#include <stdbool.h>
#include <string.h> // memcpy
#include <math.h> // fabsf
#ifdef __cplusplus
extern "C" {
#endif
// static_assert should be a #define, but if it's not,
// fall back to the _Static_assert C11 keyword.
// if C99 - static_assert is noop
// ref: https://stackoverflow.com/a/53923785/4039976
#ifndef __cplusplus
#ifndef static_assert
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
#define static_assert(cond, msg) _Static_assert(cond, msg)
#else
#define static_assert(cond, msg) struct global_scope_noop_trick
#endif
#endif
#endif
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
#ifndef __FMA__
#define __FMA__
#endif
#ifndef __F16C__
#define __F16C__
#endif
#endif
// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
#ifndef __SSE3__
#define __SSE3__
#endif
#ifndef __SSSE3__
#define __SSSE3__
#endif
#endif
// 16-bit float
// on Arm, we use __fp16
// on x86, we use uint16_t
#if defined(__ARM_NEON) && !defined(_MSC_VER)
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
//
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
//
#include <arm_neon.h>
typedef __fp16 ggml_fp16_internal_t;
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
ggml_fp16_internal_t tmp;
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
return (float)tmp;
}
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
ggml_fp16_t res;
ggml_fp16_internal_t tmp = f;
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
return res;
}
#else
typedef uint16_t ggml_fp16_internal_t;
#ifdef __wasm_simd128__
#include <wasm_simd128.h>
#else
#ifdef __POWER9_VECTOR__
#include <altivec.h>
#undef bool
#define bool _Bool
#else
#if defined(_MSC_VER) || defined(__MINGW32__)
#include <intrin.h>
#else
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
#if !defined(__riscv)
#include <immintrin.h>
#endif
#endif
#endif
#endif
#endif
#ifdef __riscv_v_intrinsic
#include <riscv_vector.h>
#endif
#ifdef __F16C__
#ifdef _MSC_VER
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
#else
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
#endif
#elif defined(__POWER9_VECTOR__)
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
/* the inline asm below is about 12% faster than the lookup method */
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
register float f;
register double d;
__asm__(
"mtfprd %0,%2\n"
"xscvhpdp %0,%0\n"
"frsp %1,%0\n" :
/* temp */ "=d"(d),
/* out */ "=f"(f):
/* in */ "r"(h));
return f;
}
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
register double d;
register ggml_fp16_t r;
__asm__( /* xscvdphp can work on double or single precision */
"xscvdphp %0,%2\n"
"mffprd %1,%0\n" :
/* temp */ "=d"(d),
/* out */ "=r"(r):
/* in */ "f"(f));
return r;
}
#else
// FP16 <-> FP32
// ref: https://github.com/Maratyszcza/FP16
static inline float fp32_from_bits(uint32_t w) {
union {
uint32_t as_bits;
float as_value;
} fp32;
fp32.as_bits = w;
return fp32.as_value;
}
static inline uint32_t fp32_to_bits(float f) {
union {
float as_value;
uint32_t as_bits;
} fp32;
fp32.as_value = f;
return fp32.as_bits;
}
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
const uint32_t w = (uint32_t) h << 16;
const uint32_t sign = w & UINT32_C(0x80000000);
const uint32_t two_w = w + w;
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float exp_scale = 0x1.0p-112f;
#else
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
#endif
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
const uint32_t magic_mask = UINT32_C(126) << 23;
const float magic_bias = 0.5f;
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
const uint32_t result = sign |
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
return fp32_from_bits(result);
}
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
#else
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
#endif
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
const uint32_t w = fp32_to_bits(f);
const uint32_t shl1_w = w + w;
const uint32_t sign = w & UINT32_C(0x80000000);
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
if (bias < UINT32_C(0x71000000)) {
bias = UINT32_C(0x71000000);
}
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
const uint32_t bits = fp32_to_bits(base);
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
const uint32_t nonsign = exp_bits + mantissa_bits;
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
}
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#endif // __F16C__
#endif // __ARM_NEON
// precomputed f32 table for f16 (256 KB)
// defined in ggml.c, initialized in ggml_init()
extern float ggml_table_f32_f16[1 << 16];
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
// This is also true for POWER9.
#if !defined(GGML_FP16_TO_FP32)
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
uint16_t s;
memcpy(&s, &f, sizeof(uint16_t));
return ggml_table_f32_f16[s];
}
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
#endif
#if !defined(GGML_FP32_TO_FP16)
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif
#define GGML_HASHTABLE_FULL ((size_t)-1)
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
struct ggml_hash_set ggml_hash_set_new(size_t size);
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
// return index, asserts if table is full
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
#ifdef __cplusplus
}
#endif

View File

@ -1,46 +0,0 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
struct ggml_vk_device {
int index;
int type; // same as VkPhysicalDeviceType
size_t heapSize;
const char * name;
const char * vendor;
int subgroupSize;
uint64_t bufferAlignment;
uint64_t maxAlloc;
};
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
bool ggml_vk_has_vulkan(void);
bool ggml_vk_has_device(void);
struct ggml_vk_device ggml_vk_current_device(void);
//
// backend API
//
// forward declaration
typedef struct ggml_backend * ggml_backend_t;
GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
#ifdef __cplusplus
}
#endif

View File

@ -1,66 +0,0 @@
// An interface allowing to compute ggml_cgraph with Metal
//
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
//
// How it works?
//
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
//
// You only need to make sure that all memory buffers that you used during the graph creation
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
// used during the graph evaluation to determine the arguments of the compute kernels.
//
// Synchronization between device and host memory (for example for input and output tensors)
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
//
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#include <stddef.h>
#include <stdbool.h>
// max memory buffers that can be mapped to the device
#define GGML_METAL_MAX_BUFFERS 64
struct ggml_tensor;
struct ggml_cgraph;
#ifdef __cplusplus
extern "C" {
#endif
//
// backend API
// user-code should use only these functions
//
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
GGML_API ggml_backend_t ggml_backend_metal_init(void);
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
// helper to check if the device supports a specific family
// ideally, the user code should be doing these checks
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
#ifdef __cplusplus
}
#endif

View File

@ -1,36 +0,0 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
GGML_API void ggml_cl_init(void);
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
// GGML_API void * ggml_cl_host_malloc(size_t size);
// GGML_API void ggml_cl_host_free(void * ptr);
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
// backend API
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,133 +0,0 @@
#pragma once
#define GGML_COMMON_DECL_C
#include "ggml-common.h"
#include "ggml.h"
// GGML internal header
#ifdef __cplusplus
extern "C" {
#endif
// Quantization
void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k);
void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k);
void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k);
void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k);
void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k);
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k);
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k);
void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k);
void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k);
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
// Dequantization
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
// Dot product
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
void iq2xs_init_impl(enum ggml_type type);
void iq2xs_free_impl(enum ggml_type type);
void iq3xs_init_impl(int grid_size);
void iq3xs_free_impl(int grid_size);
#ifdef __cplusplus
}
#endif

View File

@ -1,49 +0,0 @@
//
// MIT license
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: MIT
//
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_SYCL_MAX_DEVICES 48
#define GGML_SYCL_NAME "SYCL"
// backend API
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
// devide buffer
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
// split tensor buffer that splits matrices by rows across multiple devices
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
// TODO: these are temporary
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
// SYCL doesn't support registering host memory, keep here for reference
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
#ifdef __cplusplus
}
#endif

View File

@ -1,29 +0,0 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_VK_NAME "Vulkan"
#define GGML_VK_MAX_DEVICES 16
GGML_API void ggml_vk_instance_init(void);
// backend API
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,17 @@
ggml/src/ggml-metal/ggml-metal-embed.o: \
ggml/src/ggml-metal/ggml-metal.metal \
ggml/src/ggml-metal/ggml-metal-impl.h \
ggml/src/ggml-common.h
@echo "Embedding Metal library"
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
@rmdir ${TEMP_ASSEMBLY}

View File

@ -0,0 +1,6 @@
ggml/src/ggml-metal/ggml-metal.o: \
ggml/src/ggml-metal/ggml-metal.m \
ggml/src/ggml-metal/ggml-metal-impl.h \
ggml/include/ggml-metal.h \
ggml/include/ggml.h
$(CC) $(CFLAGS) -c $< -o $@

View File

@ -0,0 +1,164 @@
#include <ruby.h>
#include <ruby/memory_view.h>
#include "ruby_whisper.h"
VALUE mWhisper;
VALUE cContext;
VALUE cParams;
VALUE eError;
VALUE cSegment;
VALUE cModel;
ID id_to_s;
ID id_call;
ID id___method__;
ID id_to_enum;
ID id_length;
ID id_next;
ID id_new;
ID id_to_path;
ID id_URI;
ID id_pre_converted_models;
static bool is_log_callback_finalized = false;
// High level API
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
extern void init_ruby_whisper_context(VALUE *mWhisper);
extern void init_ruby_whisper_params(VALUE *mWhisper);
extern void init_ruby_whisper_error(VALUE *mWhisper);
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
extern void init_ruby_whisper_model(VALUE *mWhisper);
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
/*
* call-seq:
* lang_max_id -> Integer
*/
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
return INT2NUM(whisper_lang_max_id());
}
/*
* call-seq:
* lang_id(lang_name) -> Integer
*/
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
const char * lang_str = StringValueCStr(lang);
const int id = whisper_lang_id(lang_str);
if (-1 == id) {
rb_raise(rb_eArgError, "language not found: %s", lang_str);
}
return INT2NUM(id);
}
/*
* call-seq:
* lang_str(lang_id) -> String
*/
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
const int lang_id = NUM2INT(id);
const char * str = whisper_lang_str(lang_id);
if (NULL == str) {
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
}
return rb_str_new2(str);
}
/*
* call-seq:
* lang_str(lang_id) -> String
*/
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
const int lang_id = NUM2INT(id);
const char * str_full = whisper_lang_str_full(lang_id);
if (NULL == str_full) {
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
}
return rb_str_new2(str_full);
}
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
is_log_callback_finalized = true;
return Qnil;
}
static void
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
if (is_log_callback_finalized) {
return;
}
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
VALUE udata = rb_iv_get(mWhisper, "user_data");
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
}
/*
* call-seq:
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
*/
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
VALUE old_callback = rb_iv_get(self, "log_callback");
if (!NIL_P(old_callback)) {
rb_undefine_finalizer(old_callback);
}
rb_iv_set(self, "log_callback", log_callback);
rb_iv_set(self, "user_data", user_data);
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
rb_define_finalizer(log_callback, finalize_log_callback);
whisper_log_set(ruby_whisper_log_callback, NULL);
return Qnil;
}
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
rb_gc_mark(rwm->context);
}
static VALUE ruby_whisper_model_allocate(VALUE klass) {
ruby_whisper_model *rwm;
rwm = ALLOC(ruby_whisper_model);
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
}
void Init_whisper() {
id_to_s = rb_intern("to_s");
id_call = rb_intern("call");
id___method__ = rb_intern("__method__");
id_to_enum = rb_intern("to_enum");
id_length = rb_intern("length");
id_next = rb_intern("next");
id_new = rb_intern("new");
id_to_path = rb_intern("to_path");
id_URI = rb_intern("URI");
id_pre_converted_models = rb_intern("pre_converted_models");
mWhisper = rb_define_module("Whisper");
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
init_ruby_whisper_context(&mWhisper);
init_ruby_whisper_params(&mWhisper);
init_ruby_whisper_error(&mWhisper);
init_ruby_whisper_segment(&mWhisper, &cContext);
init_ruby_whisper_model(&mWhisper);
rb_require("whisper/model/uri");
}

View File

@ -1,418 +0,0 @@
#include <ruby.h>
#include "ruby_whisper.h"
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include <cmath>
#include <fstream>
#include <cstdio>
#include <string>
#include <thread>
#include <vector>
#ifdef __cplusplus
extern "C" {
#endif
#define BOOL_PARAMS_SETTER(self, prop, value) \
ruby_whisper_params *rwp; \
Data_Get_Struct(self, ruby_whisper_params, rwp); \
if (value == Qfalse || value == Qnil) { \
rwp->params.prop = false; \
} else { \
rwp->params.prop = true; \
} \
return value; \
#define BOOL_PARAMS_GETTER(self, prop) \
ruby_whisper_params *rwp; \
Data_Get_Struct(self, ruby_whisper_params, rwp); \
if (rwp->params.prop) { \
return Qtrue; \
} else { \
return Qfalse; \
}
VALUE mWhisper;
VALUE cContext;
VALUE cParams;
static void ruby_whisper_free(ruby_whisper *rw) {
if (rw->context) {
whisper_free(rw->context);
rw->context = NULL;
}
}
static void ruby_whisper_params_free(ruby_whisper_params *rwp) {
}
void rb_whisper_mark(ruby_whisper *rw) {
// call rb_gc_mark on any ruby references in rw
}
void rb_whisper_free(ruby_whisper *rw) {
ruby_whisper_free(rw);
free(rw);
}
void rb_whisper_params_mark(ruby_whisper_params *rwp) {
}
void rb_whisper_params_free(ruby_whisper_params *rwp) {
ruby_whisper_params_free(rwp);
free(rwp);
}
static VALUE ruby_whisper_allocate(VALUE klass) {
ruby_whisper *rw;
rw = ALLOC(ruby_whisper);
rw->context = NULL;
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
}
static VALUE ruby_whisper_params_allocate(VALUE klass) {
ruby_whisper_params *rwp;
rwp = ALLOC(ruby_whisper_params);
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
}
static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
ruby_whisper *rw;
VALUE whisper_model_file_path;
// TODO: we can support init from buffer here too maybe another ruby object to expose
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
Data_Get_Struct(self, ruby_whisper, rw);
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
}
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
if (rw->context == nullptr) {
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
}
return self;
}
/*
* transcribe a single file
* can emit to a block results
*
**/
static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
ruby_whisper *rw;
ruby_whisper_params *rwp;
VALUE wave_file_path, blk, params;
rb_scan_args(argc, argv, "02&", &wave_file_path, &params, &blk);
Data_Get_Struct(self, ruby_whisper, rw);
Data_Get_Struct(params, ruby_whisper_params, rwp);
if (!rb_respond_to(wave_file_path, rb_intern("to_s"))) {
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
}
std::string fname_inp = StringValueCStr(wave_file_path);
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
// WAV input - this is directly from main.cpp example
{
drwav wav;
std::vector<uint8_t> wav_data; // used for pipe input from stdin
if (fname_inp == "-") {
{
uint8_t buf[1024];
while (true) {
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
wav_data.insert(wav_data.end(), buf, buf + n);
}
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from stdin\n");
return self;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
return self;
}
if (wav.channels != 1 && wav.channels != 2) {
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
return self;
}
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
return self;
}
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
return self;
}
if (wav.bitsPerSample != 16) {
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
return self;
}
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
std::vector<int16_t> pcm16;
pcm16.resize(n*wav.channels);
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
drwav_uninit(&wav);
// convert to mono, float
pcmf32.resize(n);
if (wav.channels == 1) {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[i])/32768.0f;
}
} else {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
}
}
if (rwp->diarize) {
// convert to stereo, float
pcmf32s.resize(2);
pcmf32s[0].resize(n);
pcmf32s[1].resize(n);
for (uint64_t i = 0; i < n; i++) {
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
}
}
}
{
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
bool is_aborted = *(bool*)user_data;
return !is_aborted;
};
rwp->params.encoder_begin_callback_user_data = &is_aborted;
}
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
fprintf(stderr, "failed to process audio\n");
return self;
}
const int n_segments = whisper_full_n_segments(rw->context);
VALUE output = rb_str_new2("");
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(rw->context, i);
output = rb_str_concat(output, rb_str_new2(text));
}
VALUE idCall = rb_intern("call");
if (blk != Qnil) {
rb_funcall(blk, idCall, 1, output);
}
return self;
}
/*
* params.language = "auto" | "en", etc...
*/
static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
if (value == Qfalse || value == Qnil) {
rwp->params.language = "auto";
} else {
rwp->params.language = StringValueCStr(value);
}
return value;
}
static VALUE ruby_whisper_params_get_language(VALUE self) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
if (rwp->params.language) {
return rb_str_new2(rwp->params.language);
} else {
return rb_str_new2("auto");
}
}
static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, translate, value)
}
static VALUE ruby_whisper_params_get_translate(VALUE self) {
BOOL_PARAMS_GETTER(self, translate)
}
static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, no_context, value)
}
static VALUE ruby_whisper_params_get_no_context(VALUE self) {
BOOL_PARAMS_GETTER(self, no_context)
}
static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, single_segment, value)
}
static VALUE ruby_whisper_params_get_single_segment(VALUE self) {
BOOL_PARAMS_GETTER(self, single_segment)
}
static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, print_special, value)
}
static VALUE ruby_whisper_params_get_print_special(VALUE self) {
BOOL_PARAMS_GETTER(self, print_special)
}
static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, print_progress, value)
}
static VALUE ruby_whisper_params_get_print_progress(VALUE self) {
BOOL_PARAMS_GETTER(self, print_progress)
}
static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, print_realtime, value)
}
static VALUE ruby_whisper_params_get_print_realtime(VALUE self) {
BOOL_PARAMS_GETTER(self, print_realtime)
}
static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, print_timestamps, value)
}
static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) {
BOOL_PARAMS_GETTER(self, print_timestamps)
}
static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, suppress_blank, value)
}
static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) {
BOOL_PARAMS_GETTER(self, suppress_blank)
}
static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value)
}
static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) {
BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens)
}
static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
BOOL_PARAMS_GETTER(self, token_timestamps)
}
static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, token_timestamps, value)
}
static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
BOOL_PARAMS_GETTER(self, split_on_word)
}
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, split_on_word, value)
}
static VALUE ruby_whisper_params_get_diarize(VALUE self) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
if (rwp->diarize) {
return Qtrue;
} else {
return Qfalse;
}
}
static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
if (value == Qfalse || value == Qnil) {
rwp->diarize = false;
} else {
rwp->diarize = true;
} \
return value;
}
static VALUE ruby_whisper_params_get_offset(VALUE self) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
return INT2NUM(rwp->params.offset_ms);
}
static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
rwp->params.offset_ms = NUM2INT(value);
return value;
}
static VALUE ruby_whisper_params_get_duration(VALUE self) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
return INT2NUM(rwp->params.duration_ms);
}
static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
rwp->params.duration_ms = NUM2INT(value);
return value;
}
static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
return INT2NUM(rwp->params.n_max_text_ctx);
}
static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) {
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper_params, rwp);
rwp->params.n_max_text_ctx = NUM2INT(value);
return value;
}
void Init_whisper() {
mWhisper = rb_define_module("Whisper");
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
rb_define_alloc_func(cContext, ruby_whisper_allocate);
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
rb_define_alloc_func(cParams, ruby_whisper_params_allocate);
rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1);
rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0);
rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1);
rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0);
rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1);
rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0);
rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1);
rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0);
rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0);
rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1);
rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0);
rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1);
rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0);
rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1);
rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0);
rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1);
rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0);
rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1);
rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0);
rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0);
rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1);
rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0);
rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1);
rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
}
#ifdef __cplusplus
}
#endif

View File

@ -1,8 +1,15 @@
#ifndef __RUBY_WHISPER_H
#define __RUBY_WHISPER_H
#ifndef RUBY_WHISPER_H
#define RUBY_WHISPER_H
#include "whisper.h"
typedef struct {
VALUE *context;
VALUE user_data;
VALUE callback;
VALUE callbacks;
} ruby_whisper_callback_container;
typedef struct {
struct whisper_context *context;
} ruby_whisper;
@ -10,6 +17,18 @@ typedef struct {
typedef struct {
struct whisper_full_params params;
bool diarize;
ruby_whisper_callback_container *new_segment_callback_container;
ruby_whisper_callback_container *progress_callback_container;
ruby_whisper_callback_container *abort_callback_container;
} ruby_whisper_params;
typedef struct {
VALUE context;
int index;
} ruby_whisper_segment;
typedef struct {
VALUE context;
} ruby_whisper_model;
#endif

View File

@ -0,0 +1,613 @@
#include <ruby.h>
#include <ruby/memory_view.h>
#include "ruby_whisper.h"
extern ID id_to_s;
extern ID id___method__;
extern ID id_to_enum;
extern ID id_length;
extern ID id_next;
extern ID id_new;
extern ID id_to_path;
extern ID id_URI;
extern ID id_pre_converted_models;
extern VALUE cContext;
extern VALUE eError;
extern VALUE cModel;
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
extern VALUE rb_whisper_model_initialize(VALUE context);
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
static void
ruby_whisper_free(ruby_whisper *rw)
{
if (rw->context) {
whisper_free(rw->context);
rw->context = NULL;
}
}
void
rb_whisper_mark(ruby_whisper *rw)
{
// call rb_gc_mark on any ruby references in rw
}
void
rb_whisper_free(ruby_whisper *rw)
{
ruby_whisper_free(rw);
free(rw);
}
static VALUE
ruby_whisper_allocate(VALUE klass)
{
ruby_whisper *rw;
rw = ALLOC(ruby_whisper);
rw->context = NULL;
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
}
/*
* call-seq:
* new("base.en") -> Whisper::Context
* new("path/to/model.bin") -> Whisper::Context
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
*/
static VALUE
ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
{
ruby_whisper *rw;
VALUE whisper_model_file_path;
// TODO: we can support init from buffer here too maybe another ruby object to expose
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
Data_Get_Struct(self, ruby_whisper, rw);
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, whisper_model_file_path);
if (!NIL_P(pre_converted_model)) {
whisper_model_file_path = pre_converted_model;
}
if (TYPE(whisper_model_file_path) == T_STRING) {
const char * whisper_model_file_path_str = StringValueCStr(whisper_model_file_path);
if (strncmp("http://", whisper_model_file_path_str, 7) == 0 || strncmp("https://", whisper_model_file_path_str, 8) == 0) {
VALUE uri_class = rb_const_get(cModel, id_URI);
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
}
}
if (rb_obj_is_kind_of(whisper_model_file_path, rb_path2class("URI::HTTP"))) {
VALUE uri_class = rb_const_get(cModel, id_URI);
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
}
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
}
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
}
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
if (rw->context == NULL) {
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
}
return self;
}
/*
* call-seq:
* model_n_vocab -> Integer
*/
VALUE ruby_whisper_model_n_vocab(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_vocab(rw->context));
}
/*
* call-seq:
* model_n_audio_ctx -> Integer
*/
VALUE ruby_whisper_model_n_audio_ctx(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
}
/*
* call-seq:
* model_n_audio_state -> Integer
*/
VALUE ruby_whisper_model_n_audio_state(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_state(rw->context));
}
/*
* call-seq:
* model_n_audio_head -> Integer
*/
VALUE ruby_whisper_model_n_audio_head(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_head(rw->context));
}
/*
* call-seq:
* model_n_audio_layer -> Integer
*/
VALUE ruby_whisper_model_n_audio_layer(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_layer(rw->context));
}
/*
* call-seq:
* model_n_text_ctx -> Integer
*/
VALUE ruby_whisper_model_n_text_ctx(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_ctx(rw->context));
}
/*
* call-seq:
* model_n_text_state -> Integer
*/
VALUE ruby_whisper_model_n_text_state(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_state(rw->context));
}
/*
* call-seq:
* model_n_text_head -> Integer
*/
VALUE ruby_whisper_model_n_text_head(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_head(rw->context));
}
/*
* call-seq:
* model_n_text_layer -> Integer
*/
VALUE ruby_whisper_model_n_text_layer(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_layer(rw->context));
}
/*
* call-seq:
* model_n_mels -> Integer
*/
VALUE ruby_whisper_model_n_mels(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_n_mels(rw->context));
}
/*
* call-seq:
* model_ftype -> Integer
*/
VALUE ruby_whisper_model_ftype(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_model_ftype(rw->context));
}
/*
* call-seq:
* model_type -> String
*/
VALUE ruby_whisper_model_type(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return rb_str_new2(whisper_model_type_readable(rw->context));
}
/*
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
* Not thread safe for same context
* Uses the specified decoding strategy to obtain the text.
*
* call-seq:
* full(params, samples, n_samples) -> nil
* full(params, samples) -> nil
*
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
*/
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
{
if (argc < 2 || argc > 3) {
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
}
ruby_whisper *rw;
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper, rw);
VALUE params = argv[0];
Data_Get_Struct(params, ruby_whisper_params, rwp);
VALUE samples = argv[1];
int n_samples;
rb_memory_view_t view;
const bool memory_view_available_p = rb_memory_view_available_p(samples);
if (argc == 3) {
n_samples = NUM2INT(argv[2]);
if (TYPE(samples) == T_ARRAY) {
if (RARRAY_LEN(samples) < n_samples) {
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
}
}
// Should check when samples.respond_to?(:length)?
} else {
if (TYPE(samples) == T_ARRAY) {
n_samples = RARRAY_LEN(samples);
} else if (memory_view_available_p) {
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
n_samples = view.byte_size / view.item_size;
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
}
}
float * c_samples = (float *)malloc(n_samples * sizeof(float));
if (memory_view_available_p) {
c_samples = (float *)view.data;
} else {
if (TYPE(samples) == T_ARRAY) {
for (int i = 0; i < n_samples; i++) {
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
}
} else {
// TODO: use rb_block_call
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
for (int i = 0; i < n_samples; i++) {
// TODO: check if iter is exhausted and raise ArgumentError appropriately
VALUE sample = rb_funcall(iter, id_next, 0);
c_samples[i] = RFLOAT_VALUE(sample);
}
}
}
register_callbacks(rwp, &self);
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
if (0 == result) {
return self;
} else {
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
}
}
/*
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
* Result is stored in the default state of the context
* Not thread safe if executed in parallel on the same context.
* It seems this approach can offer some speedup in some cases.
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
*
* call-seq:
* full_parallel(params, samples) -> nil
* full_parallel(params, samples, n_samples) -> nil
* full_parallel(params, samples, n_samples, n_processors) -> nil
* full_parallel(params, samples, nil, n_processors) -> nil
*/
static VALUE
ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
{
if (argc < 2 || argc > 4) {
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
}
ruby_whisper *rw;
ruby_whisper_params *rwp;
Data_Get_Struct(self, ruby_whisper, rw);
VALUE params = argv[0];
Data_Get_Struct(params, ruby_whisper_params, rwp);
VALUE samples = argv[1];
int n_samples;
int n_processors;
rb_memory_view_t view;
const bool memory_view_available_p = rb_memory_view_available_p(samples);
switch (argc) {
case 2:
n_processors = 1;
break;
case 3:
n_processors = 1;
break;
case 4:
n_processors = NUM2INT(argv[3]);
break;
}
if (argc >= 3 && !NIL_P(argv[2])) {
n_samples = NUM2INT(argv[2]);
if (TYPE(samples) == T_ARRAY) {
if (RARRAY_LEN(samples) < n_samples) {
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
}
}
// Should check when samples.respond_to?(:length)?
} else if (memory_view_available_p) {
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
n_samples = view.byte_size / view.item_size;
} else {
if (TYPE(samples) == T_ARRAY) {
n_samples = RARRAY_LEN(samples);
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
}
}
float * c_samples = (float *)malloc(n_samples * sizeof(float));
if (memory_view_available_p) {
c_samples = (float *)view.data;
} else {
if (TYPE(samples) == T_ARRAY) {
for (int i = 0; i < n_samples; i++) {
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
}
} else {
// FIXME: use rb_block_call
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
for (int i = 0; i < n_samples; i++) {
// TODO: check if iter is exhausted and raise ArgumentError
VALUE sample = rb_funcall(iter, id_next, 0);
c_samples[i] = RFLOAT_VALUE(sample);
}
}
}
register_callbacks(rwp, &self);
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
if (0 == result) {
return self;
} else {
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
}
}
/*
* Number of segments.
*
* call-seq:
* full_n_segments -> Integer
*/
static VALUE
ruby_whisper_full_n_segments(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_full_n_segments(rw->context));
}
/*
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
*
* call-seq:
* full_lang_id -> Integer
*/
static VALUE
ruby_whisper_full_lang_id(VALUE self)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
return INT2NUM(whisper_full_lang_id(rw->context));
}
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment)
{
const int c_i_segment = NUM2INT(i_segment);
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
}
return c_i_segment;
}
/*
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
*
* full_get_segment_t0(3) # => 1668 (16680 ms)
*
* call-seq:
* full_get_segment_t0(segment_index) -> Integer
*/
static VALUE
ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
return INT2NUM(t0);
}
/*
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
*
* full_get_segment_t1(3) # => 1668 (16680 ms)
*
* call-seq:
* full_get_segment_t1(segment_index) -> Integer
*/
static VALUE
ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
return INT2NUM(t1);
}
/*
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
*
* full_get_segment_speacker_turn_next(3) # => true
*
* call-seq:
* full_get_segment_speacker_turn_next(segment_index) -> bool
*/
static VALUE
ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
return speaker_turn_next ? Qtrue : Qfalse;
}
/*
* Text of a segment indexed by +segment_index+.
*
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
*
* call-seq:
* full_get_segment_text(segment_index) -> String
*/
static VALUE
ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
return rb_str_new2(text);
}
/*
* call-seq:
* full_get_segment_no_speech_prob(segment_index) -> Float
*/
static VALUE
ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
{
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const float no_speech_prob = whisper_full_get_segment_no_speech_prob(rw->context, c_i_segment);
return DBL2NUM(no_speech_prob);
}
// High level API
static VALUE
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
{
return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
}
/*
* Yields each Whisper::Segment:
*
* whisper.transcribe("path/to/audio.wav", params)
* whisper.each_segment do |segment|
* puts segment.text
* end
*
* Returns an Enumerator if no block given:
*
* whisper.transcribe("path/to/audio.wav", params)
* enum = whisper.each_segment
* enum.to_a # => [#<Whisper::Segment>, ...]
*
* call-seq:
* each_segment {|segment| ... }
* each_segment -> Enumerator
*/
static VALUE
ruby_whisper_each_segment(VALUE self)
{
if (!rb_block_given_p()) {
const VALUE method_name = rb_funcall(self, id___method__, 0);
return rb_funcall(self, id_to_enum, 1, method_name);
}
ruby_whisper *rw;
Data_Get_Struct(self, ruby_whisper, rw);
const int n_segments = whisper_full_n_segments(rw->context);
for (int i = 0; i < n_segments; ++i) {
rb_yield(rb_whisper_segment_initialize(self, i));
}
return self;
}
/*
* call-seq:
* model -> Whisper::Model
*/
static VALUE
ruby_whisper_get_model(VALUE self)
{
return rb_whisper_model_initialize(self);
}
void
init_ruby_whisper_context(VALUE *mWhisper)
{
cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
rb_define_alloc_func(cContext, ruby_whisper_allocate);
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
rb_define_method(cContext, "full_get_segment_no_speech_prob", ruby_whisper_full_get_segment_no_speech_prob, 1);
rb_define_method(cContext, "full", ruby_whisper_full, -1);
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
// High leve
rb_define_method(cContext, "full_get_segment", ruby_whisper_full_get_segment, 1);
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
}

View File

@ -0,0 +1,52 @@
#include <ruby.h>
extern VALUE eError;
VALUE ruby_whisper_error_initialize(VALUE self, VALUE code)
{
const int c_code = NUM2INT(code);
const char *raw_message;
switch (c_code) {
case -2:
raw_message = "failed to compute log mel spectrogram";
break;
case -3:
raw_message = "failed to auto-detect language";
break;
case -4:
raw_message = "too many decoders requested";
break;
case -5:
raw_message = "audio_ctx is larger than the maximum allowed";
break;
case -6:
raw_message = "failed to encode";
break;
case -7:
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
break;
case -8:
raw_message = "failed to decode";
break;
case -9:
raw_message = "failed to decode";
break;
default:
raw_message = "unknown error";
break;
}
const VALUE message = rb_str_new2(raw_message);
rb_call_super(1, &message);
rb_iv_set(self, "@code", code);
return self;
}
void
init_ruby_whisper_error(VALUE *mWhisper)
{
eError = rb_define_class_under(*mWhisper, "Error", rb_eStandardError);
rb_define_attr(eError, "code", true, false);
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
}

View File

@ -0,0 +1,210 @@
#include <ruby.h>
#include "ruby_whisper.h"
extern VALUE cModel;
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
rb_gc_mark(rwm->context);
}
static VALUE ruby_whisper_model_allocate(VALUE klass) {
ruby_whisper_model *rwm;
rwm = ALLOC(ruby_whisper_model);
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
}
VALUE rb_whisper_model_initialize(VALUE context) {
ruby_whisper_model *rwm;
const VALUE model = ruby_whisper_model_allocate(cModel);
Data_Get_Struct(model, ruby_whisper_model, rwm);
rwm->context = context;
return model;
};
/*
* call-seq:
* n_vocab -> Integer
*/
static VALUE
ruby_whisper_model_n_vocab(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_vocab(rw->context));
}
/*
* call-seq:
* n_audio_ctx -> Integer
*/
static VALUE
ruby_whisper_model_n_audio_ctx(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
}
/*
* call-seq:
* n_audio_state -> Integer
*/
static VALUE
ruby_whisper_model_n_audio_state(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_state(rw->context));
}
/*
* call-seq:
* n_audio_head -> Integer
*/
static VALUE
ruby_whisper_model_n_audio_head(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_head(rw->context));
}
/*
* call-seq:
* n_audio_layer -> Integer
*/
static VALUE
ruby_whisper_model_n_audio_layer(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_audio_layer(rw->context));
}
/*
* call-seq:
* n_text_ctx -> Integer
*/
static VALUE
ruby_whisper_model_n_text_ctx(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_ctx(rw->context));
}
/*
* call-seq:
* n_text_state -> Integer
*/
static VALUE
ruby_whisper_model_n_text_state(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_state(rw->context));
}
/*
* call-seq:
* n_text_head -> Integer
*/
static VALUE
ruby_whisper_model_n_text_head(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_head(rw->context));
}
/*
* call-seq:
* n_text_layer -> Integer
*/
static VALUE
ruby_whisper_model_n_text_layer(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_text_layer(rw->context));
}
/*
* call-seq:
* n_mels -> Integer
*/
static VALUE
ruby_whisper_model_n_mels(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_n_mels(rw->context));
}
/*
* call-seq:
* ftype -> Integer
*/
static VALUE
ruby_whisper_model_ftype(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return INT2NUM(whisper_model_ftype(rw->context));
}
/*
* call-seq:
* type -> String
*/
static VALUE
ruby_whisper_model_type(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
return rb_str_new2(whisper_model_type_readable(rw->context));
}
void
init_ruby_whisper_model(VALUE *mWhisper)
{
cModel = rb_define_class_under(*mWhisper, "Model", rb_cObject);
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
rb_define_method(cModel, "n_vocab", ruby_whisper_model_n_vocab, 0);
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
rb_define_method(cModel, "n_audio_state", ruby_whisper_model_n_audio_state, 0);
rb_define_method(cModel, "n_audio_head", ruby_whisper_model_n_audio_head, 0);
rb_define_method(cModel, "n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
rb_define_method(cModel, "n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
rb_define_method(cModel, "n_text_state", ruby_whisper_model_n_text_state, 0);
rb_define_method(cModel, "n_text_head", ruby_whisper_model_n_text_head, 0);
rb_define_method(cModel, "n_text_layer", ruby_whisper_model_n_text_layer, 0);
rb_define_method(cModel, "n_mels", ruby_whisper_model_n_mels, 0);
rb_define_method(cModel, "ftype", ruby_whisper_model_ftype, 0);
rb_define_method(cModel, "type", ruby_whisper_model_type, 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,123 @@
#include <ruby.h>
#include "ruby_whisper.h"
extern VALUE cSegment;
static void
rb_whisper_segment_mark(ruby_whisper_segment *rws)
{
rb_gc_mark(rws->context);
}
VALUE
ruby_whisper_segment_allocate(VALUE klass)
{
ruby_whisper_segment *rws;
rws = ALLOC(ruby_whisper_segment);
return Data_Wrap_Struct(klass, rb_whisper_segment_mark, RUBY_DEFAULT_FREE, rws);
}
VALUE
rb_whisper_segment_initialize(VALUE context, int index)
{
ruby_whisper_segment *rws;
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
Data_Get_Struct(segment, ruby_whisper_segment, rws);
rws->context = context;
rws->index = index;
return segment;
};
/*
* Start time in milliseconds.
*
* call-seq:
* start_time -> Integer
*/
static VALUE
ruby_whisper_segment_get_start_time(VALUE self)
{
ruby_whisper_segment *rws;
Data_Get_Struct(self, ruby_whisper_segment, rws);
ruby_whisper *rw;
Data_Get_Struct(rws->context, ruby_whisper, rw);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
return INT2NUM(t0 * 10);
}
/*
* End time in milliseconds.
*
* call-seq:
* end_time -> Integer
*/
static VALUE
ruby_whisper_segment_get_end_time(VALUE self)
{
ruby_whisper_segment *rws;
Data_Get_Struct(self, ruby_whisper_segment, rws);
ruby_whisper *rw;
Data_Get_Struct(rws->context, ruby_whisper, rw);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
return INT2NUM(t1 * 10);
}
/*
* Whether the next segment is predicted as a speaker turn.
*
* call-seq:
* speaker_turn_next? -> bool
*/
static VALUE
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
{
ruby_whisper_segment *rws;
Data_Get_Struct(self, ruby_whisper_segment, rws);
ruby_whisper *rw;
Data_Get_Struct(rws->context, ruby_whisper, rw);
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
}
/*
* call-seq:
* text -> String
*/
static VALUE
ruby_whisper_segment_get_text(VALUE self)
{
ruby_whisper_segment *rws;
Data_Get_Struct(self, ruby_whisper_segment, rws);
ruby_whisper *rw;
Data_Get_Struct(rws->context, ruby_whisper, rw);
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
return rb_str_new2(text);
}
/*
* call-seq:
* no_speech_prob -> Float
*/
static VALUE
ruby_whisper_segment_get_no_speech_prob(VALUE self)
{
ruby_whisper_segment *rws;
Data_Get_Struct(self, ruby_whisper_segment, rws);
ruby_whisper *rw;
Data_Get_Struct(rws->context, ruby_whisper, rw);
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
}
void
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
{
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
}

View File

@ -0,0 +1,83 @@
#include <ruby.h>
#include "ruby_whisper.h"
#include "common-whisper.h"
#include <string>
#include <vector>
#ifdef __cplusplus
extern "C" {
#endif
extern ID id_to_s;
extern ID id_call;
extern void
register_callbacks(ruby_whisper_params * rwp, VALUE * self);
/*
* transcribe a single file
* can emit to a block results
*
* params = Whisper::Params.new
* params.duration = 60_000
* whisper.transcribe "path/to/audio.wav", params do |text|
* puts text
* end
*
* call-seq:
* transcribe(path_to_audio, params) {|text| ...}
**/
VALUE
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
ruby_whisper *rw;
ruby_whisper_params *rwp;
VALUE wave_file_path, blk, params;
rb_scan_args(argc, argv, "02&", &wave_file_path, &params, &blk);
Data_Get_Struct(self, ruby_whisper, rw);
Data_Get_Struct(params, ruby_whisper_params, rwp);
if (!rb_respond_to(wave_file_path, id_to_s)) {
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
}
std::string fname_inp = StringValueCStr(wave_file_path);
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
return self;
}
{
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
bool is_aborted = *(bool*)user_data;
return !is_aborted;
};
rwp->params.encoder_begin_callback_user_data = &is_aborted;
}
register_callbacks(rwp, &self);
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
fprintf(stderr, "failed to process audio\n");
return self;
}
const int n_segments = whisper_full_n_segments(rw->context);
VALUE output = rb_str_new2("");
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(rw->context, i);
output = rb_str_concat(output, rb_str_new2(text));
}
VALUE idCall = id_call;
if (blk != Qnil) {
rb_funcall(blk, idCall, 1, output);
}
return self;
}
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,6 @@
require "yaml"
sources = `git ls-files -z ../..`.split("\x0")
paths = YAML.load_file("../../.github/workflows/bindings-ruby.yml")[true]["push"]["paths"]
paths.delete "bindings/ruby/**"
EXTSOURCES = (Dir.glob(paths, base: "../..").collect {|path| "../../#{path}"} << "../../LICENSE") & sources

View File

@ -0,0 +1,170 @@
require "uri"
require "net/http"
require "time"
require "pathname"
require "io/console/size"
module Whisper
class Model
class URI
def initialize(uri)
@uri = URI(uri)
end
def to_path
cache
cache_path.to_path
end
def clear_cache
path = cache_path
path.delete if path.exist?
end
private
def cache_path
base_cache_dir/@uri.host/@uri.path[1..]
end
def base_cache_dir
base = case RUBY_PLATFORM
when /mswin|mingw/
ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
when /darwin/
Pathname(Dir.home)/"Library/Caches"
else
ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
end
base/"whisper.cpp"
end
def cache
path = cache_path
headers = {}
headers["if-modified-since"] = path.mtime.httpdate if path.exist?
request @uri, headers
path
end
def request(uri, headers)
Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
request = Net::HTTP::Get.new(uri, headers)
http.request request do |response|
case response
when Net::HTTPNotModified
# noop
when Net::HTTPOK
download response
when Net::HTTPRedirection
request URI(response["location"]), headers
else
return if headers.key?("if-modified-since") # Use cache file
raise "#{response.code} #{response.message}\n#{response.body}"
end
end
end
rescue => err
if cache_path.exist?
warn err
# Use cache file
else
raise
end
end
def download(response)
path = cache_path
path.dirname.mkpath unless path.dirname.exist?
downloading_path = Pathname("#{path}.downloading")
size = response.content_length
downloading_path.open "wb" do |file|
downloaded = 0
response.read_body do |chunk|
file << chunk
downloaded += chunk.bytesize
show_progress downloaded, size
end
$stderr.puts
end
downloading_path.rename path
end
def show_progress(current, size)
progress_rate_available = size && $stderr.tty?
unless @prev
@prev = Time.now
$stderr.puts "Downloading #{@uri} to #{cache_path}"
end
now = Time.now
if progress_rate_available
return if now - @prev < 1 && current < size
progress_width = 20
progress = current.to_f / size
arrow_length = progress * progress_width
arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
padding = ' ' * ($stderr.winsize[1] - line.size)
$stderr.print "\r#{line}#{padding}"
else
return if now - @prev < 1
$stderr.print "."
end
@prev = now
end
def format_bytesize(bytesize)
return "0.0 B" if bytesize.zero?
units = %w[B KiB MiB GiB TiB]
exp = (Math.log(bytesize) / Math.log(1024)).to_i
format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
end
end
@pre_converted_models = %w[
tiny
tiny.en
tiny-q5_1
tiny.en-q5_1
tiny-q8_0
base
base.en
base-q5_1
base.en-q5_1
base-q8_0
small
small.en
small.en-tdrz
small-q5_1
small.en-q5_1
small-q8_0
medium
medium.en
medium-q5_0
medium.en-q5_0
medium-q8_0
large-v1
large-v2
large-v2-q5_0
large-v2-q8_0
large-v3
large-v3-q5_0
large-v3-turbo
large-v3-turbo-q5_0
large-v3-turbo-q8_0
].each_with_object({}) {|name, models|
models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
}
class << self
attr_reader :pre_converted_models
end
end
end

View File

@ -0,0 +1,189 @@
module Whisper
interface _Samples
def length: () -> Integer
def each: { (Float) -> void } -> void
end
type log_callback = ^(Integer level, String message, Object user_data) -> void
type new_segment_callback = ^(Whisper::Context, void, Integer n_new, Object user_data) -> void
type progress_callback = ^(Whisper::Context, void, Integer progress, Object user_data) -> void
type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish
LOG_LEVEL_NONE: Integer
LOG_LEVEL_INFO: Integer
LOG_LEVEL_WARN: Integer
LOG_LEVEL_ERROR: Integer
LOG_LEVEL_DEBUG: Integer
LOG_LEVEL_CONT: Integer
def self.lang_max_id: () -> Integer
def self.lang_id: (string name) -> Integer
def self.lang_str: (Integer id) -> String
def self.lang_str_full: (Integer id) -> String
def self.log_set: (log_callback, Object? user_data) -> log_callback
class Context
def self.new: (string | _ToPath | ::URI::HTTP) -> instance
def transcribe: (string, Params) -> self
| (string, Params) { (String) -> void } -> self
def model_n_vocab: () -> Integer
def model_n_audio_ctx: () -> Integer
def model_n_audio_state: () -> Integer
def model_n_text_head: () -> Integer
def model_n_text_layer: () -> Integer
def model_n_mels: () -> Integer
def model_ftype: () -> Integer
def model_type: () -> String
def each_segment: { (Segment) -> void } -> void
| () -> Enumerator[Segment]
def model: () -> Model
def full_get_segment: (Integer nth) -> Segment
def full_n_segments: () -> Integer
def full_lang_id: () -> Integer
def full_get_segment_t0: (Integer) -> Integer
def full_get_segment_t1: (Integer) -> Integer
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
def full_get_segment_text: (Integer) -> String
def full_get_segment_no_speech_prob: (Integer) -> Float
def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
| (Params, _Samples, ?Integer n_samples) -> self
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
| (Params, _Samples, ?Integer n_samples) -> self
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
end
class Params
def self.new: (
?language: string,
?translate: boolish,
?no_context: boolish,
?single_segment: boolish,
?print_special: boolish,
?print_progress: boolish,
?print_realtime: boolish,
?print_timestamps: boolish,
?suppress_blank: boolish,
?suppress_nst: boolish,
?token_timestamps: boolish,
?split_on_word: boolish,
?initial_prompt: string | nil,
?diarize: boolish,
?offset: Integer,
?duration: Integer,
?max_text_tokens: Integer,
?temperature: Float,
?max_initial_ts: Float,
?length_penalty: Float,
?temperature_inc: Float,
?entropy_thold: Float,
?logprob_thold: Float,
?no_speech_thold: Float,
?new_segment_callback: new_segment_callback,
?new_segment_callback_user_data: Object,
?progress_callback: progress_callback,
?progress_callback_user_data: Object,
?abort_callback: abort_callback,
?abort_callback_user_data: Object
) -> instance
def language=: (String) -> String # TODO: Enumerate lang names
def language: () -> String
def translate=: (boolish) -> boolish
def translate: () -> (true | false)
def no_context=: (boolish) -> boolish
def no_context: () -> (true | false)
def single_segment=: (boolish) -> boolish
def single_segment: () -> (true | false)
def print_special=: (boolish) -> boolish
def print_special: () -> (true | false)
def print_progress=: (boolish) -> boolish
def print_progress: () -> (true | false)
def print_realtime=: (boolish) -> boolish
def print_realtime: () -> (true | false)
def print_timestamps=: (boolish) -> boolish
def print_timestamps: () -> (true | false)
def suppress_blank=: (boolish) -> boolish
def suppress_blank: () -> (true | false)
def suppress_nst=: (boolish) -> boolish
def suppress_nst: () -> (true | false)
def token_timestamps=: (boolish) -> boolish
def token_timestamps: () -> (true | false)
def split_on_word=: (boolish) -> boolish
def split_on_word: () -> (true | false)
def initial_prompt=: (_ToS) -> _ToS
def initial_prompt: () -> (String | nil)
def diarize=: (boolish) -> boolish
def diarize: () -> (true | false)
def offset=: (Integer) -> Integer
def offset: () -> Integer
def duration=: (Integer) -> Integer
def duration: () -> Integer
def max_text_tokens=: (Integer) -> Integer
def max_text_tokens: () -> Integer
def temperature=: (Float) -> Float
def temperature: () -> Float
def max_initial_ts=: (Float) -> Float
def max_initial_ts: () -> Float
def length_penalty=: (Float) -> Float
def length_penalty: () -> Float
def temperature_inc=: (Float) -> Float
def temperature_inc: () -> Float
def entropy_thold=: (Float) -> Float
def entropy_thold: () -> Float
def logprob_thold=: (Float) -> Float
def logprob_thold: () -> Float
def no_speech_thold=: (Float) -> Float
def no_speech_thold: () -> Float
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
def new_segment_callback: () -> (new_segment_callback | nil)
def new_segment_callback_user_data=: (Object) -> Object
def new_segment_callback_user_data: () -> Object
def progress_callback=: (progress_callback) -> progress_callback
def progress_callback: () -> (progress_callback | nil)
def progress_callback_user_data=: (Object) -> Object
def progress_callback_user_data: () -> Object
def abort_callback=: (abort_callback) -> abort_callback
def abort_callback: () -> (abort_callback | nil)
def abort_callback_user_data=: (Object) -> Object
def abort_callback_user_data: () -> Object
def on_new_segment: { (Segment) -> void } -> void
def on_progress: { (Integer progress) -> void } -> void
def abort_on: { (Object user_data) -> boolish } -> void
end
class Model
def self.pre_converted_models: () -> Hash[String, Model::URI]
def self.new: () -> instance
def n_vocab: () -> Integer
def n_audio_ctx: () -> Integer
def n_audio_state: () -> Integer
def n_audio_head: () -> Integer
def n_audio_layer: () -> Integer
def n_text_ctx: () -> Integer
def n_text_state: () -> Integer
def n_text_head: () -> Integer
def n_text_layer: () -> Integer
def n_mels: () -> Integer
def ftype: () -> Integer
def type: () -> String
class URI
def self.new: (string | ::URI::HTTP) -> self
def to_path: -> String
def clear_cache: -> void
end
end
class Segment
def start_time: () -> Integer
def end_time: () -> Integer
def speaker_next_turn?: () -> (true | false)
def text: () -> String
def no_speech_prob: () -> Float
end
class Error < StandardError
attr_reader code: Integer
def self.new: (Integer code) -> instance
end
end

View File

@ -0,0 +1,24 @@
require "test/unit"
require "whisper"
require_relative "jfk_reader/jfk_reader"
class TestBase < Test::Unit::TestCase
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
class << self
attr_reader :whisper
def startup
@whisper = Whisper::Context.new("base.en")
params = Whisper::Params.new
params.print_timestamps = false
@whisper.transcribe(TestBase::AUDIO, params)
end
end
private
def whisper
self.class.whisper
end
end

View File

@ -0,0 +1,5 @@
Makefile
jfk_reader.o
jfk_reader.so
jfk_reader.bundle
jfk_reader.dll

View File

@ -0,0 +1,3 @@
require "mkmf"
create_makefile("jfk_reader")

View File

@ -0,0 +1,68 @@
#include <ruby.h>
#include <ruby/memory_view.h>
#include <ruby/encoding.h>
static VALUE
jfk_reader_initialize(VALUE self, VALUE audio_path)
{
rb_iv_set(self, "audio_path", audio_path);
return Qnil;
}
static bool
jfk_reader_get_memory_view(const VALUE obj, rb_memory_view_t *view, int flags)
{
VALUE audio_path = rb_iv_get(obj, "audio_path");
const char *audio_path_str = StringValueCStr(audio_path);
const int n_samples = 176000;
float *data = (float *)malloc(n_samples * sizeof(float));
short *samples = (short *)malloc(n_samples * sizeof(short));
FILE *file = fopen(audio_path_str, "rb");
fseek(file, 78, SEEK_SET);
fread(samples, sizeof(short), n_samples, file);
fclose(file);
for (int i = 0; i < n_samples; i++) {
data[i] = samples[i]/32768.0;
}
view->obj = obj;
view->data = (void *)data;
view->byte_size = sizeof(float) * n_samples;
view->readonly = true;
view->format = "f";
view->item_size = sizeof(float);
view->item_desc.components = NULL;
view->item_desc.length = 0;
view->ndim = 1;
view->shape = NULL;
view->sub_offsets = NULL;
view->private_data = NULL;
return true;
}
static bool
jfk_reader_release_memory_view(const VALUE obj, rb_memory_view_t *view)
{
return true;
}
static bool
jfk_reader_memory_view_available_p(const VALUE obj)
{
return true;
}
static const rb_memory_view_entry_t jfk_reader_view_entry = {
jfk_reader_get_memory_view,
jfk_reader_release_memory_view,
jfk_reader_memory_view_available_p
};
void Init_jfk_reader(void)
{
VALUE cJFKReader = rb_define_class("JFKReader", rb_cObject);
rb_memory_view_register(cJFKReader, &jfk_reader_view_entry);
rb_define_method(cJFKReader, "initialize", jfk_reader_initialize, 1);
}

View File

@ -0,0 +1,160 @@
require_relative "helper"
class TestCallback < TestBase
def setup
GC.start
@params = Whisper::Params.new
@whisper = Whisper::Context.new("base.en")
@audio = File.join(AUDIO)
end
def test_new_segment_callback
@params.new_segment_callback = ->(context, state, n_new, user_data) {
assert_kind_of Integer, n_new
assert n_new > 0
assert_same @whisper, context
n_segments = context.full_n_segments
n_new.times do |i|
i_segment = n_segments - 1 + i
start_time = context.full_get_segment_t0(i_segment) * 10
end_time = context.full_get_segment_t1(i_segment) * 10
text = context.full_get_segment_text(i_segment)
assert_kind_of Integer, start_time
assert start_time >= 0
assert_kind_of Integer, end_time
assert end_time > 0
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
end
}
@whisper.transcribe(@audio, @params)
end
def test_new_segment_callback_closure
search_word = "what"
@params.new_segment_callback = ->(context, state, n_new, user_data) {
n_segments = context.full_n_segments
n_new.times do |i|
i_segment = n_segments - 1 + i
text = context.full_get_segment_text(i_segment)
if text.include?(search_word)
t0 = context.full_get_segment_t0(i_segment)
t1 = context.full_get_segment_t1(i_segment)
raise "search word '#{search_word}' found at between #{t0} and #{t1}"
end
end
}
assert_raise RuntimeError do
@whisper.transcribe(@audio, @params)
end
end
def test_new_segment_callback_user_data
udata = Object.new
@params.new_segment_callback_user_data = udata
@params.new_segment_callback = ->(context, state, n_new, user_data) {
assert_same udata, user_data
}
@whisper.transcribe(@audio, @params)
end
def test_new_segment_callback_user_data_gc
@params.new_segment_callback_user_data = "My user data"
@params.new_segment_callback = ->(context, state, n_new, user_data) {
assert_equal "My user data", user_data
}
GC.start
assert_same @whisper, @whisper.transcribe(@audio, @params)
end
def test_progress_callback
first = nil
last = nil
@params.progress_callback = ->(context, state, progress, user_data) {
assert_kind_of Integer, progress
assert 0 <= progress && progress <= 100
assert_same @whisper, context
first = progress if first.nil?
last = progress
}
@whisper.transcribe(@audio, @params)
assert_equal 0, first
assert_equal 100, last
end
def test_progress_callback_user_data
udata = Object.new
@params.progress_callback_user_data = udata
@params.progress_callback = ->(context, state, n_new, user_data) {
assert_same udata, user_data
}
@whisper.transcribe(@audio, @params)
end
def test_on_progress
first = nil
last = nil
@params.on_progress do |progress|
assert_kind_of Integer, progress
assert 0 <= progress && progress <= 100
first = progress if first.nil?
last = progress
end
@whisper.transcribe(@audio, @params)
assert_equal 0, first
assert_equal 100, last
end
def test_abort_callback
i = 0
@params.abort_callback = ->(user_data) {
assert_nil user_data
i += 1
return false
}
@whisper.transcribe(@audio, @params)
assert i > 0
end
def test_abort_callback_abort
i = 0
@params.abort_callback = ->(user_data) {
i += 1
return i == 3
}
@whisper.transcribe(@audio, @params)
assert_equal 3, i
end
def test_abort_callback_user_data
udata = Object.new
@params.abort_callback_user_data = udata
yielded = nil
@params.abort_callback = ->(user_data) {
yielded = user_data
}
@whisper.transcribe(@audio, @params)
assert_same udata, yielded
end
def test_abort_on
do_abort = false
aborted_from_callback = false
@params.on_new_segment do |segment|
do_abort = true if segment.text.match? /ask/
end
i = 0
@params.abort_on do
i += 1
do_abort
end
@whisper.transcribe(@audio, @params)
assert i > 0
end
end

View File

@ -0,0 +1,20 @@
require_relative "helper"
class TestError < TestBase
def test_error
error = Whisper::Error.new(-2)
assert_equal "failed to compute log mel spectrogram", error.message
assert_equal -2, error.code
end
def test_unknown_error
error = Whisper::Error.new(-20)
assert_equal "unknown error", error.message
end
def test_non_int_code
assert_raise TypeError do
error = Whisper::Error.new("non int")
end
end
end

View File

@ -0,0 +1,109 @@
require_relative "helper"
require "pathname"
class TestModel < TestBase
def test_model
whisper = Whisper::Context.new("base.en")
assert_instance_of Whisper::Model, whisper.model
end
def test_attributes
whisper = Whisper::Context.new("base.en")
model = whisper.model
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
def test_gc
model = Whisper::Context.new("base.en").model
GC.start
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
def test_pathname
path = Pathname(Whisper::Model.pre_converted_models["base.en"].to_path)
whisper = Whisper::Context.new(path)
model = whisper.model
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
def test_auto_download
path = Whisper::Model.pre_converted_models["base.en"].to_path
assert_path_exist path
assert_equal 147964211, File.size(path)
end
def test_uri_string
path = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"
whisper = Whisper::Context.new(path)
model = whisper.model
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
def test_uri
path = URI("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin")
whisper = Whisper::Context.new(path)
model = whisper.model
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
end

View File

@ -0,0 +1,31 @@
require_relative "helper"
require 'tempfile'
require 'tmpdir'
require 'shellwords'
class TestPackage < TestBase
def test_build
Tempfile.create do |file|
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
assert file.size > 0
assert_path_exist file.to_path
end
end
sub_test_case "Building binary on installation" do
def setup
system "rake", "build", exception: true
end
def test_install
match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
filename = match_data[1]
version = match_data[2]
basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
Dir.mktmpdir do |dir|
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
end
end
end
end

View File

@ -0,0 +1,246 @@
require_relative "helper"
class TestParams < TestBase
PARAM_NAMES = [
:language,
:translate,
:no_context,
:single_segment,
:print_special,
:print_progress,
:print_realtime,
:print_timestamps,
:suppress_blank,
:suppress_nst,
:token_timestamps,
:split_on_word,
:initial_prompt,
:diarize,
:offset,
:duration,
:max_text_tokens,
:temperature,
:max_initial_ts,
:length_penalty,
:temperature_inc,
:entropy_thold,
:logprob_thold,
:no_speech_thold,
:new_segment_callback,
:new_segment_callback_user_data,
:progress_callback,
:progress_callback_user_data,
:abort_callback,
:abort_callback_user_data,
]
def setup
@params = Whisper::Params.new
end
def test_language
@params.language = "en"
assert_equal @params.language, "en"
@params.language = "auto"
assert_equal @params.language, "auto"
end
def test_offset
@params.offset = 10_000
assert_equal @params.offset, 10_000
@params.offset = 0
assert_equal @params.offset, 0
end
def test_duration
@params.duration = 60_000
assert_equal @params.duration, 60_000
@params.duration = 0
assert_equal @params.duration, 0
end
def test_max_text_tokens
@params.max_text_tokens = 300
assert_equal @params.max_text_tokens, 300
@params.max_text_tokens = 0
assert_equal @params.max_text_tokens, 0
end
def test_translate
@params.translate = true
assert @params.translate
@params.translate = false
assert !@params.translate
end
def test_no_context
@params.no_context = true
assert @params.no_context
@params.no_context = false
assert !@params.no_context
end
def test_single_segment
@params.single_segment = true
assert @params.single_segment
@params.single_segment = false
assert !@params.single_segment
end
def test_print_special
@params.print_special = true
assert @params.print_special
@params.print_special = false
assert !@params.print_special
end
def test_print_progress
@params.print_progress = true
assert @params.print_progress
@params.print_progress = false
assert !@params.print_progress
end
def test_print_realtime
@params.print_realtime = true
assert @params.print_realtime
@params.print_realtime = false
assert !@params.print_realtime
end
def test_print_timestamps
@params.print_timestamps = true
assert @params.print_timestamps
@params.print_timestamps = false
assert !@params.print_timestamps
end
def test_suppress_blank
@params.suppress_blank = true
assert @params.suppress_blank
@params.suppress_blank = false
assert !@params.suppress_blank
end
def test_suppress_nst
@params.suppress_nst = true
assert @params.suppress_nst
@params.suppress_nst = false
assert !@params.suppress_nst
end
def test_token_timestamps
@params.token_timestamps = true
assert @params.token_timestamps
@params.token_timestamps = false
assert !@params.token_timestamps
end
def test_split_on_word
@params.split_on_word = true
assert @params.split_on_word
@params.split_on_word = false
assert !@params.split_on_word
end
def test_initial_prompt
assert_nil @params.initial_prompt
@params.initial_prompt = "You are a polite person."
assert_equal "You are a polite person.", @params.initial_prompt
end
def test_temperature
assert_equal 0.0, @params.temperature
@params.temperature = 0.5
assert_equal 0.5, @params.temperature
end
def test_max_initial_ts
assert_equal 1.0, @params.max_initial_ts
@params.max_initial_ts = 600.0
assert_equal 600.0, @params.max_initial_ts
end
def test_length_penalty
assert_equal -1.0, @params.length_penalty
@params.length_penalty = 0.5
assert_equal 0.5, @params.length_penalty
end
def test_temperature_inc
assert_in_delta 0.2, @params.temperature_inc
@params.temperature_inc = 0.5
assert_in_delta 0.5, @params.temperature_inc
end
def test_entropy_thold
assert_in_delta 2.4, @params.entropy_thold
@params.entropy_thold = 3.0
assert_in_delta 3.0, @params.entropy_thold
end
def test_logprob_thold
assert_in_delta -1.0, @params.logprob_thold
@params.logprob_thold = -0.5
assert_in_delta -0.5, @params.logprob_thold
end
def test_no_speech_thold
assert_in_delta 0.6, @params.no_speech_thold
@params.no_speech_thold = 0.2
assert_in_delta 0.2, @params.no_speech_thold
end
def test_new_with_kw_args
params = Whisper::Params.new(language: "es")
assert_equal "es", params.language
assert_equal 1.0, params.max_initial_ts
end
def test_new_with_kw_args_non_existent
assert_raise ArgumentError do
Whisper::Params.new(non_existent: "value")
end
end
def test_new_with_kw_args_wrong_type
assert_raise TypeError do
Whisper::Params.new(language: 3)
end
end
data(PARAM_NAMES.collect {|param| [param, param]}.to_h)
def test_new_with_kw_args_default_values(param)
default_value = @params.send(param)
value = case [param, default_value]
in [*, true | false]
!default_value
in [*, Integer | Float]
default_value + 1
in [:language, *]
"es"
in [:initial_prompt, *]
"Initial prompt"
in [/_callback\Z/, *]
proc {}
in [/_user_data\Z/, *]
Object.new
end
params = Whisper::Params.new(param => value)
if Float === value
assert_in_delta value, params.send(param)
else
assert_equal value, params.send(param)
end
PARAM_NAMES.reject {|name| name == param}.each do |name|
expected = @params.send(name)
actual = params.send(name)
if Float === expected
assert_in_delta expected, actual
else
assert_equal expected, actual
end
end
end
end

View File

@ -0,0 +1,74 @@
require_relative "helper"
class TestSegment < TestBase
def test_iteration
whisper.each_segment do |segment|
assert_instance_of Whisper::Segment, segment
end
end
def test_enumerator
enum = whisper.each_segment
assert_instance_of Enumerator, enum
enum.to_a.each_with_index do |segment, index|
assert_instance_of Whisper::Segment, segment
assert_kind_of Integer, index
end
end
def test_start_time
i = 0
whisper.each_segment do |segment|
assert_equal 0, segment.start_time if i == 0
i += 1
end
end
def test_end_time
i = 0
whisper.each_segment do |segment|
assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
i += 1
end
end
def test_no_speech_prob
no_speech_prob = nil
whisper.each_segment do |segment|
no_speech_prob = segment.no_speech_prob
end
assert no_speech_prob > 0.0
end
def test_on_new_segment
params = Whisper::Params.new
seg = nil
index = 0
params.on_new_segment do |segment|
assert_instance_of Whisper::Segment, segment
if index == 0
seg = segment
assert_equal 0, segment.start_time
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
end
index += 1
end
whisper.transcribe(AUDIO, params)
assert_equal 0, seg.start_time
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
end
def test_on_new_segment_twice
params = Whisper::Params.new
seg = nil
params.on_new_segment do |segment|
seg = segment
return
end
params.on_new_segment do |segment|
assert_same seg, segment
return
end
whisper.transcribe(AUDIO, params)
end
end

View File

@ -1,131 +1,223 @@
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
EXTDIR = File.join(TOPDIR, 'ext')
#$LIBDIR = File.join(TOPDIR, 'lib')
#$:.unshift(LIBDIR)
$:.unshift(EXTDIR)
require_relative "helper"
require "stringio"
require "etc"
require 'whisper'
require 'test/unit'
# Exists to detect memory-related bug
Whisper.log_set ->(level, buffer, user_data) {}, nil
class TestWhisper < Test::Unit::TestCase
class TestWhisper < TestBase
def setup
@params = Whisper::Params.new
end
def test_language
@params.language = "en"
assert_equal @params.language, "en"
@params.language = "auto"
assert_equal @params.language, "auto"
end
def test_offset
@params.offset = 10_000
assert_equal @params.offset, 10_000
@params.offset = 0
assert_equal @params.offset, 0
end
def test_duration
@params.duration = 60_000
assert_equal @params.duration, 60_000
@params.duration = 0
assert_equal @params.duration, 0
end
def test_max_text_tokens
@params.max_text_tokens = 300
assert_equal @params.max_text_tokens, 300
@params.max_text_tokens = 0
assert_equal @params.max_text_tokens, 0
end
def test_translate
@params.translate = true
assert @params.translate
@params.translate = false
assert !@params.translate
end
def test_no_context
@params.no_context = true
assert @params.no_context
@params.no_context = false
assert !@params.no_context
end
def test_single_segment
@params.single_segment = true
assert @params.single_segment
@params.single_segment = false
assert !@params.single_segment
end
def test_print_special
@params.print_special = true
assert @params.print_special
@params.print_special = false
assert !@params.print_special
end
def test_print_progress
@params.print_progress = true
assert @params.print_progress
@params.print_progress = false
assert !@params.print_progress
end
def test_print_realtime
@params.print_realtime = true
assert @params.print_realtime
@params.print_realtime = false
assert !@params.print_realtime
end
def test_print_timestamps
@params.print_timestamps = true
assert @params.print_timestamps
@params.print_timestamps = false
assert !@params.print_timestamps
end
def test_suppress_blank
@params.suppress_blank = true
assert @params.suppress_blank
@params.suppress_blank = false
assert !@params.suppress_blank
end
def test_suppress_non_speech_tokens
@params.suppress_non_speech_tokens = true
assert @params.suppress_non_speech_tokens
@params.suppress_non_speech_tokens = false
assert !@params.suppress_non_speech_tokens
end
def test_token_timestamps
@params.token_timestamps = true
assert @params.token_timestamps
@params.token_timestamps = false
assert !@params.token_timestamps
end
def test_split_on_word
@params.split_on_word = true
assert @params.split_on_word
@params.split_on_word = false
assert !@params.split_on_word
end
def test_whisper
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
@whisper = Whisper::Context.new("base.en")
params = Whisper::Params.new
params.print_timestamps = false
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
@whisper.transcribe(jfk, params) {|text|
@whisper.transcribe(AUDIO, params) {|text|
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
}
end
sub_test_case "After transcription" do
def test_full_n_segments
assert_equal 1, whisper.full_n_segments
end
def test_full_lang_id
assert_equal 0, whisper.full_lang_id
end
def test_full_get_segment
segment = whisper.full_get_segment(0)
assert_equal 0, segment.start_time
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
end
def test_full_get_segment_t0
assert_equal 0, whisper.full_get_segment_t0(0)
assert_raise IndexError do
whisper.full_get_segment_t0(whisper.full_n_segments)
end
assert_raise IndexError do
whisper.full_get_segment_t0(-1)
end
end
def test_full_get_segment_t1
t1 = whisper.full_get_segment_t1(0)
assert_kind_of Integer, t1
assert t1 > 0
assert_raise IndexError do
whisper.full_get_segment_t1(whisper.full_n_segments)
end
end
def test_full_get_segment_speaker_turn_next
assert_false whisper.full_get_segment_speaker_turn_next(0)
end
def test_full_get_segment_text
assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
end
def test_full_get_segment_no_speech_prob
prob = whisper.full_get_segment_no_speech_prob(0)
assert prob > 0.0
assert prob < 1.0
end
end
def test_lang_max_id
assert_kind_of Integer, Whisper.lang_max_id
end
def test_lang_id
assert_equal 0, Whisper.lang_id("en")
assert_raise ArgumentError do
Whisper.lang_id("non existing language")
end
end
def test_lang_str
assert_equal "en", Whisper.lang_str(0)
assert_raise IndexError do
Whisper.lang_str(Whisper.lang_max_id + 1)
end
end
def test_lang_str_full
assert_equal "english", Whisper.lang_str_full(0)
assert_raise IndexError do
Whisper.lang_str_full(Whisper.lang_max_id + 1)
end
end
def test_log_set
user_data = Object.new
logs = []
log_callback = ->(level, buffer, udata) {
logs << [level, buffer, udata]
}
Whisper.log_set log_callback, user_data
Whisper::Context.new("base.en")
assert logs.length > 30
logs.each do |log|
assert_include [Whisper::LOG_LEVEL_DEBUG, Whisper::LOG_LEVEL_INFO, Whisper::LOG_LEVEL_WARN], log[0]
assert_same user_data, log[2]
end
end
def test_log_suppress
stderr = $stderr
Whisper.log_set ->(level, buffer, user_data) {
# do nothing
}, nil
dev = StringIO.new("")
$stderr = dev
Whisper::Context.new("base.en")
assert_empty dev.string
ensure
$stderr = stderr
end
sub_test_case "full" do
def setup
super
@whisper = Whisper::Context.new("base.en")
@samples = File.read(AUDIO, nil, 78).unpack("s<*").collect {|i| i.to_f / 2**15}
end
def test_full
@whisper.full(@params, @samples, @samples.length)
assert_equal 1, @whisper.full_n_segments
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_without_length
@whisper.full(@params, @samples)
assert_equal 1, @whisper.full_n_segments
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_enumerator
samples = @samples.each
@whisper.full(@params, samples, @samples.length)
assert_equal 1, @whisper.full_n_segments
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_enumerator_without_length
samples = @samples.each
assert_raise ArgumentError do
@whisper.full(@params, samples)
end
end
def test_full_enumerator_with_too_large_length
samples = @samples.each.take(10).to_enum
assert_raise StopIteration do
@whisper.full(@params, samples, 11)
end
end
def test_full_with_memory_view
samples = JFKReader.new(AUDIO)
@whisper.full(@params, samples)
assert_equal 1, @whisper.full_n_segments
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
end
def test_full_parallel
@whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_with_memory_view
samples = JFKReader.new(AUDIO)
@whisper.full_parallel(@params, samples, nil, Etc.nprocessors)
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_length_and_n_processors
@whisper.full_parallel(@params, @samples)
assert_equal 1, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_length
@whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)
assert_equal Etc.nprocessors, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
def test_full_parallel_without_n_processors
@whisper.full_parallel(@params, @samples, @samples.length)
assert_equal 1, @whisper.full_n_segments
text = @whisper.each_segment.collect(&:text).join
assert_match /ask what you can do/i, text
assert_match /for your country/i, text
end
end
end

View File

@ -1,28 +1,36 @@
require_relative "extsources"
Gem::Specification.new do |s|
s.name = "whispercpp"
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
s.version = '1.3.0'
s.date = '2024-05-14'
s.version = '1.3.1'
s.date = '2024-12-19'
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
s.email = 'todd.fisher@gmail.com'
s.extra_rdoc_files = ['LICENSE', 'README.md']
s.files = ["LICENSE", "README.md", "Rakefile", "ext/extconf.rb", "ext/ggml.c", "ext/ruby_whisper.cpp", "ext/whisper.cpp", "ext/dr_wav.h", "ext/ggml.h", "ext/ruby_whisper.h", "ext/whisper.h"]
#### Load-time details
s.require_paths = ['lib','ext']
s.files = `git ls-files . -z`.split("\x0") +
EXTSOURCES.collect {|file|
basename = File.basename(file)
if s.extra_rdoc_files.include?(basename)
basename
else
file.sub("../..", "ext")
end
}
s.summary = %q{Ruby whisper.cpp bindings}
s.test_files = ["tests/test_whisper.rb"]
s.test_files = s.files.select {|file| file.start_with? "tests/"}
s.extensions << 'ext/extconf.rb'
s.required_ruby_version = '>= 3.1.0'
#### Documentation and testing.
s.homepage = 'https://github.com/ggerganov/whisper.cpp'
s.rdoc_options = ['--main', '../../README.md']
s.rdoc_options = ['--main', 'README.md']
s.platform = Gem::Platform::RUBY
s.licenses = ['MIT']
end

519
build-xcframework.sh Executable file
View File

@ -0,0 +1,519 @@
#!/bin/bash
#
# Options
IOS_MIN_OS_VERSION=16.4
MACOS_MIN_OS_VERSION=13.3
VISIONOS_MIN_OS_VERSION=1.0
TVOS_MIN_OS_VERSION=16.4
BUILD_SHARED_LIBS=OFF
WHISPER_BUILD_EXAMPLES=OFF
WHISPER_BUILD_TESTS=OFF
WHISPER_BUILD_SERVER=OFF
GGML_METAL=ON
GGML_METAL_EMBED_LIBRARY=ON
GGML_BLAS_DEFAULT=ON
GGML_METAL_USE_BF16=ON
GGML_OPENMP=OFF
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
# Common options for all builds
COMMON_CMAKE_ARGS=(
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
-DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
-DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
-DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
-DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-DWHISPER_BUILD_EXAMPLES=${WHISPER_BUILD_EXAMPLES}
-DWHISPER_BUILD_TESTS=${WHISPER_BUILD_TESTS}
-DWHISPER_BUILD_SERVER=${WHISPER_BUILD_SERVER}
-DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
-DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
-DGGML_METAL=${GGML_METAL}
-DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
-DGGML_NATIVE=OFF
-DGGML_OPENMP=${GGML_OPENMP}
)
check_required_tool() {
local tool=$1
local install_message=$2
if ! command -v $tool &> /dev/null; then
echo "Error: $tool is required but not found."
echo "$install_message"
exit 1
fi
}
echo "Checking for required tools..."
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
set -e
## Clean up previous builds
rm -rf build-apple
rm -rf build-ios-sim
rm -rf build-ios-device
rm -rf build-macos
rm -rf build-visionos
rm -rf build-visionos-sim
rm -rf build-tvos-sim
rm -rf build-tvos-device
# Setup the xcframework build directory structure
setup_framework_structure() {
local build_dir=$1
local min_os_version=$2
local platform=$3 # "ios", "macos", "visionos", or "tvos"
local framework_name="whisper"
echo "Creating ${platform}-style framework structure for ${build_dir}"
if [[ "$platform" == "macos" ]]; then
# macOS versioned structure uses versioned directories
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
# Create symbolic links
ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
else
# iOS/VisionOS/tvOS use a flat structure
mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
# Remove any existing structure to ensure clean build
rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
fi
# Copy all required headers (common for all platforms)
cp include/whisper.h ${header_path}
cp ggml/include/ggml.h ${header_path}
cp ggml/include/ggml-alloc.h ${header_path}
cp ggml/include/ggml-backend.h ${header_path}
cp ggml/include/ggml-metal.h ${header_path}
cp ggml/include/ggml-cpu.h ${header_path}
cp ggml/include/ggml-blas.h ${header_path}
cp ggml/include/gguf.h ${header_path}
# Create module map (common for all platforms)
cat > ${module_path}module.modulemap << EOF
framework module whisper {
header "whisper.h"
header "ggml.h"
header "ggml-alloc.h"
header "ggml-backend.h"
header "ggml-metal.h"
header "ggml-cpu.h"
header "ggml-blas.h"
header "gguf.h"
link "c++"
link framework "Accelerate"
link framework "Metal"
link framework "Foundation"
export *
}
EOF
# Platform-specific settings for Info.plist
local platform_name=""
local sdk_name=""
local supported_platform=""
case "$platform" in
"ios")
platform_name="iphoneos"
sdk_name="iphoneos${min_os_version}"
supported_platform="iPhoneOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>1</integer>
<integer>2</integer>
</array>'
;;
"macos")
platform_name="macosx"
sdk_name="macosx${min_os_version}"
supported_platform="MacOSX"
local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
local device_family=""
;;
"visionos")
platform_name="xros"
sdk_name="xros${min_os_version}"
supported_platform="XRPlatform"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=""
;;
"tvos")
platform_name="appletvos"
sdk_name="appletvos${min_os_version}"
supported_platform="AppleTVOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>3</integer>
</array>'
;;
esac
# Create Info.plist
cat > ${plist_path} << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>whisper</string>
<key>CFBundleIdentifier</key>
<string>org.ggml.whisper</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>whisper</string>
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>MinimumOSVersion</key>
<string>${min_os_version}</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>${supported_platform}</string>
</array>${device_family}
<key>DTPlatformName</key>
<string>${platform_name}</string>
<key>DTSDKName</key>
<string>${sdk_name}</string>
</dict>
</plist>
EOF
}
# Create dynamic libraries from static libraries.
combine_static_libraries() {
local build_dir="$1"
local release_dir="$2"
local platform="$3" # "ios", "macos", "visionos", or "tvos"
local is_simulator="$4"
local base_dir="$(pwd)"
local framework_name="whisper"
# Determine output path based on platform
local output_lib=""
if [[ "$platform" == "macos" ]]; then
# macOS uses versioned structure
output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
else
# iOS, visionOS, and tvOS use a directory flat structure
output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
fi
local libs=(
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
)
# Create temporary directory for processing
local temp_dir="${base_dir}/${build_dir}/temp"
mkdir -p "${temp_dir}"
# Since we have multiple architectures libtool will find object files that do not
# match the target architecture. We suppress these warnings.
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
# Determine SDK, architectures, and install_name based on platform and simulator flag.
local sdk=""
local archs=""
local min_version_flag=""
local install_name=""
case "$platform" in
"ios")
if [[ "$is_simulator" == "true" ]]; then
sdk="iphonesimulator"
archs="arm64 x86_64"
min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
else
sdk="iphoneos"
archs="arm64"
min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
fi
install_name="@rpath/whisper.framework/whisper"
;;
"macos")
sdk="macosx"
archs="arm64 x86_64"
min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
install_name="@rpath/whisper.framework/Versions/Current/whisper"
;;
"visionos")
if [[ "$is_simulator" == "true" ]]; then
sdk="xrsimulator"
archs="arm64 x86_64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
else
sdk="xros"
archs="arm64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
fi
# Use flat structure for visionOS, same as iOS
install_name="@rpath/whisper.framework/whisper"
;;
"tvos")
if [[ "$is_simulator" == "true" ]]; then
sdk="appletvsimulator"
archs="arm64 x86_64"
min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
else
sdk="appletvos"
archs="arm64"
min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
fi
install_name="@rpath/whisper.framework/whisper"
;;
esac
# Build architecture flags
local arch_flags=""
for arch in $archs; do
arch_flags+=" -arch $arch"
done
# Create dynamic library
echo "Creating dynamic library for ${platform}."
xcrun -sdk $sdk clang++ -dynamiclib \
-isysroot $(xcrun --sdk $sdk --show-sdk-path) \
$arch_flags \
$min_version_flag \
-Wl,-force_load,"${temp_dir}/combined.a" \
-framework Foundation -framework Metal -framework Accelerate \
-install_name "$install_name" \
-o "${base_dir}/${output_lib}"
# Platform-specific post-processing for device builds
if [[ "$is_simulator" == "false" ]]; then
if command -v vtool &>/dev/null; then
case "$platform" in
"ios")
echo "Marking binary as a framework binary for iOS..."
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"visionos")
echo "Marking binary as a framework binary for visionOS..."
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"tvos")
echo "Marking binary as a framework binary for tvOS..."
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
esac
else
echo "Warning: vtool not found. Binary may not pass App Store validation."
fi
fi
echo "Creating properly formatted dSYM..."
# Create a separate directory for dSYMs for all platforms
mkdir -p "${base_dir}/${build_dir}/dSYMs"
# iOS and visionOS style dSYM (flat structure)
if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
# Create a copy of the binary that will be stripped
cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
# Strip debug symbols from the copy
xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
# Replace the original with the stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
else
# macOS style dSYM
# First strip debug info to a separate file
xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
# Replace original binary with stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
fi
# Remove any automatically generated dSYM files in the framework structure as they will
# otherwise case Invalid Bundle Structure validation errors.
if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
rm -rf "${base_dir}/${output_lib}.dSYM"
fi
# Clean up
rm -rf "${temp_dir}"
}
echo "Building for iOS simulator..."
cmake -B build-ios-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DIOS=ON \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_SYSROOT=iphonesimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-ios-sim --config Release -- -quiet
echo "Building for iOS devices..."
cmake -B build-ios-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DCMAKE_OSX_SYSROOT=iphoneos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-ios-device --config Release -- -quiet
echo "Building for macOS..."
cmake -B build-macos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-macos --config Release -- -quiet
echo "Building for visionOS..."
cmake -B build-visionos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xros \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos --config Release -- -quiet
echo "Building for visionOS simulator..."
cmake -B build-visionos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xrsimulator \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos-sim --config Release -- -quiet
# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
echo "Building for tvOS simulator..."
cmake -B build-tvos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvsimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-sim --config Release -- -quiet
echo "Building for tvOS devices..."
cmake -B build-tvos-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-device --config Release -- -quiet
# Setup frameworks and copy binaries and headers
echo "Setting up framework structures..."
setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
# Create dynamic libraries from static libraries
echo "Creating dynamic libraries from static libraries..."
combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
combine_static_libraries "build-macos" "Release" "macos" "false"
combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
# Create XCFramework with correct debug symbols paths
echo "Creating XCFramework..."
xcodebuild -create-xcframework \
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \
-framework $(pwd)/build-ios-device/framework/whisper.framework \
-debug-symbols $(pwd)/build-ios-device/dSYMs/whisper.dSYM \
-framework $(pwd)/build-macos/framework/whisper.framework \
-debug-symbols $(pwd)/build-macos/dSYMS/whisper.dSYM \
-framework $(pwd)/build-visionos/framework/whisper.framework \
-debug-symbols $(pwd)/build-visionos/dSYMs/whisper.dSYM \
-framework $(pwd)/build-visionos-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-visionos-sim/dSYMs/whisper.dSYM \
-framework $(pwd)/build-tvos-device/framework/whisper.framework \
-debug-symbols $(pwd)/build-tvos-device/dSYMs/whisper.dSYM \
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
-debug-symbols $(pwd)/build-tvos-sim/dSYMs/whisper.dSYM \
-output $(pwd)/build-apple/whisper.xcframework

41
ci/README.md Normal file
View File

@ -0,0 +1,41 @@
# CI
In addition to [Github Actions](https://github.com/ggerganov/whisper.cpp/actions) `whisper.cpp` uses a custom CI framework:
https://github.com/ggml-org/ci
It monitors the `master` branch for new commits and runs the
[ci/run.sh](https://github.com/ggerganov/whisper.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
to cover various hardware architectures, including GPU and Apple Silicon instances.
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
Only the branches of this repo are monitored for this keyword.
It is a good practice, before publishing changes to execute the full CI locally on your machine:
```bash
mkdir tmp
# CPU-only build
bash ./ci/run.sh ./tmp/results ./tmp/mnt
# with CUDA support
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
```
## Environment Variables
The CI script supports several environment variables to control the build:
| Variable | Description |
|----------|-------------|
| `GG_BUILD_CUDA` | Enable NVIDIA CUDA GPU acceleration |
| `GG_BUILD_SYCL` | Enable Intel SYCL acceleration |
| `GG_BUILD_VULKAN` | Enable Vulkan GPU acceleration |
| `GG_BUILD_METAL` | Enable Metal acceleration on Apple Silicon |
| `GG_BUILD_BLAS` | Enable BLAS CPU acceleration |
| `GG_BUILD_OPENVINO` | Enable OpenVINO support |
| `GG_BUILD_COREML` | Enable Core ML support for Apple Neural Engine |
| `GG_BUILD_LOW_PERF` | Limit tests for low-performance hardware |
| `GG_BUILD_TEST_MODELS` | Comma-separated list of models to test (e.g. "tiny.en,tiny,base,medium", defaults to all models unless `GG_BUILD_LOW_PERF` is set) |

333
ci/run.sh Normal file
View File

@ -0,0 +1,333 @@
#!/bin/bash
#
# sample usage:
#
# mkdir tmp
#
# # CPU-only build
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with CUDA support
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
if [ -z "$2" ]; then
echo "usage: $0 <output-dir> <mnt-dir>"
exit 1
fi
mkdir -p "$1"
mkdir -p "$2"
OUT=$(realpath "$1")
MNT=$(realpath "$2")
rm -f "$OUT/*.log"
rm -f "$OUT/*.exit"
rm -f "$OUT/*.md"
sd=`dirname $0`
cd $sd/../
SRC=`pwd`
ALL_MODELS=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
BENCH_N_THREADS=4
BENCH_ENCODER_ONLY=0
BENCH_FLASH_ATTN=0
# check for user-specified models first. if not specified, use fast models
if [ ! -z ${GG_BUILD_TEST_MODELS} ]; then
IFS=',' read -r -a MODELS <<< "${GG_BUILD_TEST_MODELS}"
else
if [ ! -z ${GG_BUILD_LOW_PERF} ]; then
MODELS=( "tiny" "base" "small" )
else
MODELS=("${ALL_MODELS[@]}")
fi
fi
CMAKE_EXTRA="-DWHISPER_FATAL_WARNINGS=ON"
if [ ! -z ${GG_BUILD_CUDA} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
fi
if [ ! -z ${GG_BUILD_SYCL} ]; then
if [ -z ${ONEAPI_ROOT} ]; then
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
echo "source /opt/intel/oneapi/setvars.sh"
exit 1
fi
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
fi
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON"
fi
if [ ! -z ${GG_BUILD_METAL} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
fi
if [ ! -z ${GG_BUILD_VULKAN} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=ON"
fi
if [ ! -z ${GG_BUILD_BLAS} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_BLAS=ON"
fi
if [ ! -z ${GG_BUILD_COREML} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_COREML=ON"
fi
## helpers
# download a file if it does not exist or if it is outdated
function gg_wget {
local out=$1
local url=$2
local cwd=`pwd`
mkdir -p $out
cd $out
# should not re-download if file is the same
wget -nv -N $url
cd $cwd
}
function gg_download_model {
local model_name=$1
local model_file="$MNT/models/ggml-${model_name}.bin"
if [ ! -f ${model_file} ]; then
local cwd=`pwd`
mkdir -p "$MNT/models"
cd "$MNT/models"
bash "$cwd/models/download-ggml-model.sh" ${model_name} .
cd "$cwd"
fi
}
function gg_printf {
printf -- "$@" >> $OUT/README.md
}
# Helper function to check command exit status
function gg_check_last_command_status {
local exit_file=$1
local command_name=$2
local exit_status=$?
echo "$exit_status" > "$exit_file"
if [ $exit_status -ne 0 ]; then
echo "Error: Command $command_name failed with exit status $exit_status"
return 1
fi
return 0
}
# Usage: gg_run <test_name> [additional_args...]
#
# Parameters:
# test_name - Name of the test to run (calls gg_run_<test_name>)
# additional_args - Any additional arguments to pass to the test function (first argument is appended to the log filename)
function gg_run {
ci=$1
if [ $# -gt 1 ]; then
ci="${ci}_${2}"
fi
set -o pipefail
set -x
gg_run_$1 "$@" | tee $OUT/$ci.log
cur=$?
echo "$cur" > $OUT/$ci.exit
set +x
set +o pipefail
gg_sum_$1 "$@"
ret=$((ret | cur))
}
function gg_check_build_requirements {
if ! command -v cmake &> /dev/null; then
gg_printf 'cmake not found, please install'
fi
if ! command -v make &> /dev/null; then
gg_printf 'make not found, please install'
fi
}
## ci
function gg_run_ctest {
mode=$2
cd ${SRC}
rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode}
set -e
gg_check_build_requirements
(time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
}
function gg_sum_ctest {
mode=$2
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs ctest in '${mode}' mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}
function gg_run_bench {
cd ${SRC}
# set flash attention flag if enabled
fattn=""
if [ "$BENCH_FLASH_ATTN" -eq 1 ]; then
fattn="-fa"
fi
# run memcpy benchmark if not encoder-only mode
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
echo "Running memcpy benchmark"
(time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-memcpy.log
gg_check_last_command_status "$OUT/${ci}-memcpy.exit" "memcpy benchmark"
echo "Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads"
(time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-mul_mat.log
gg_check_last_command_status "$OUT/${ci}-mul_mat.exit" "ggml_mul_mat benchmark"
fi
echo "Running benchmark for all models"
# generate header for the benchmark table
{
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "Config" "Model" "Th" "FA" "Enc." "Dec." "Bch5" "PP" "Commit"
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---"
} | tee -a $OUT/${ci}-models-table.log
# run benchmark for each model
for model in "${MODELS[@]}"; do
echo "Benchmarking model: $model"
# run the benchmark and capture output
output=$(./build-ci-release/bin/whisper-bench -m $MNT/models/ggml-$model.bin -t $BENCH_N_THREADS $fattn 2>&1)
ret=$?
# save the raw output
echo "$output" > $OUT/${ci}-bench-$model.log
if [ $ret -eq 0 ]; then
# parse the benchmark results
encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
system_info=$(echo "$output" | grep "system_info")
actual_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
# determine configuration
config=""
if [[ $system_info == *"AVX2 = 1"* ]]; then
config="$config AVX2"
fi
if [[ $system_info == *"NEON = 1"* ]]; then
config="$config NEON"
fi
if [[ $system_info == *"BLAS = 1"* ]]; then
config="$config BLAS"
fi
if [[ $system_info == *"COREML = 1"* ]]; then
config="$config COREML"
fi
if [[ $system_info == *"CUDA = 1"* ]]; then
config="$config CUDA"
fi
if [[ $system_info == *"METAL = 1"* ]]; then
config="$config METAL"
fi
# get commit hash
commit=$(git rev-parse --short HEAD)
# add row to benchmark table
printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" \
"$config" "$model" "$actual_threads" "$BENCH_FLASH_ATTN" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit" \
| tee -a $OUT/${ci}-models-table.log
else
echo "Benchmark failed for model: $model" | tee -a $OUT/${ci}-bench-errors.log
fi
done
}
function gg_sum_bench {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Whisper Benchmark Results\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
# show memcpy and ggml_mul_mat benchmark results if available
if [ "$BENCH_ENCODER_ONLY" -eq 0 ]; then
if [ -f "$OUT/${ci}-memcpy.log" ]; then
gg_printf '#### memcpy Benchmark\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-memcpy.log)"
fi
if [ -f "$OUT/${ci}-mul_mat.log" ]; then
gg_printf '#### ggml_mul_mat Benchmark\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-mul_mat.log)"
fi
fi
# show model benchmark results
gg_printf '#### Model Benchmarks\n\n'
if [ -f "$OUT/${ci}-models-table.log" ]; then
gg_printf '%s\n\n' "$(cat $OUT/${ci}-models-table.log)"
else
gg_printf 'No model benchmark results available.\n\n'
fi
# show any errors that occurred
if [ -f "$OUT/${ci}-bench-errors.log" ]; then
gg_printf '#### Benchmark Errors\n\n'
gg_printf '```\n%s\n```\n\n' "$(cat $OUT/${ci}-bench-errors.log)"
fi
}
ret=0
for model in "${MODELS[@]}"; do
test $ret -eq 0 && gg_download_model ${model}
done
test $ret -eq 0 && gg_run ctest debug
test $ret -eq 0 && gg_run ctest release
test $ret -eq 0 && gg_run bench
exit $ret

28
close-issue.yml Normal file
View File

@ -0,0 +1,28 @@
name: Close inactive issues
on:
schedule:
- cron: "42 0 * * *"
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
issues: write
jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
days-before-issue-stale: 30
days-before-issue-close: 14
stale-issue-label: "stale"
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
days-before-pr-stale: -1
days-before-pr-close: -1
operations-per-run: 10000
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -13,5 +13,4 @@ set_target_properties(${TARGET}
PROPERTIES
EXPORT_COMPILE_COMMANDS ON
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib"
)

View File

@ -42,6 +42,8 @@ endif()
if(MSVC)
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
else()
execute_process(
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}

View File

@ -1,10 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
libdir=${exec_prefix}/lib
includedir=${prefix}/include
Name: whisper
Description: Port of OpenAI's Whisper model in C/C++
Version: @PROJECT_VERSION@
Libs: -L${libdir} -lwhisper
Libs: -L${libdir} -lggml -lggml-base -lwhisper
Cflags: -I${includedir}

View File

@ -14,10 +14,6 @@ if (WHISPER_SDL2)
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()
if (WHISPER_CLBLAST)
find_package(CLBlast REQUIRED)
endif()
# common
set(TARGET common)
@ -56,6 +52,8 @@ add_library(${TARGET} STATIC
common.cpp
common-ggml.h
common-ggml.cpp
common-whisper.h
common-whisper.cpp
grammar-parser.h
grammar-parser.cpp
${COMMON_SOURCES_FFMPEG}
@ -63,7 +61,7 @@ add_library(${TARGET} STATIC
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS})
target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
@ -97,52 +95,29 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
if (EMSCRIPTEN)
add_subdirectory(whisper.wasm)
set_target_properties(libmain PROPERTIES FOLDER "libs")
add_subdirectory(stream.wasm)
set_target_properties(libstream PROPERTIES FOLDER "libs")
add_subdirectory(command.wasm)
set_target_properties(libcommand PROPERTIES FOLDER "libs")
#add_subdirectory(talk.wasm)
#set_target_properties(libtalk PROPERTIES FOLDER "libs")
add_subdirectory(bench.wasm)
set_target_properties(libbench PROPERTIES FOLDER "libs")
elseif(CMAKE_JS_VERSION)
add_subdirectory(addon.node)
set_target_properties(addon.node PROPERTIES FOLDER "examples")
else()
add_subdirectory(main)
set_target_properties(main PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
add_subdirectory(stream)
set_target_properties(stream PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)
add_subdirectory(server)
set_target_properties(server PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
add_subdirectory(command)
set_target_properties(command PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)
add_subdirectory(cli)
add_subdirectory(bench)
set_target_properties(bench PROPERTIES FOLDER "examples")
add_subdirectory(server)
add_subdirectory(quantize)
set_target_properties(quantize PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
# TODO: disabled until update
# https://github.com/ggerganov/whisper.cpp/issues/1818
#add_subdirectory(talk)
#set_target_properties(talk PROPERTIES FOLDER "examples")
add_subdirectory(talk-llama)
set_target_properties(talk-llama PROPERTIES FOLDER "examples")
add_subdirectory(lsp)
set_target_properties(lsp PROPERTIES FOLDER "examples")
if (GGML_SYCL)
add_subdirectory(sycl)
set_target_properties(sycl PROPERTIES FOLDER "examples")
endif()
endif (WHISPER_SDL2)
if (WHISPER_SDL2)
add_subdirectory(stream)
add_subdirectory(command)
add_subdirectory(talk-llama)
add_subdirectory(lsp)
if (GGML_SYCL)
add_subdirectory(sycl)
endif()
endif (WHISPER_SDL2)
add_subdirectory(deprecation-warning)
endif()
if (WHISPER_SDL2)
add_subdirectory(wchess)
set_target_properties(wchess PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)

View File

@ -1,5 +1,6 @@
#include "napi.h"
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
@ -171,8 +172,8 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
// read the input audio file if params.pcmf32 is not provided
if (params.pcmf32.empty()) {
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
continue;
}
} else {
@ -330,6 +331,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>();
int32_t audio_ctx = whisper_params.Get("audio_ctx").As<Napi::Number>();
bool comma_in_time = whisper_params.Get("comma_in_time").As<Napi::Boolean>();
int32_t max_len = whisper_params.Get("max_len").As<Napi::Number>();
Napi::Value pcmf32Value = whisper_params.Get("pcmf32");
std::vector<float> pcmf32_vec;
@ -352,6 +354,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
params.audio_ctx = audio_ctx;
params.pcmf32 = pcmf32_vec;
params.comma_in_time = comma_in_time;
params.max_len = max_len;
Napi::Function callback = info[1].As<Napi::Function>();
Worker* worker = new Worker(callback, params);

View File

@ -18,6 +18,7 @@ const whisperParams = {
translate: true,
no_timestamps: false,
audio_ctx: 0,
max_len: 0,
};
const arguments = process.argv.slice(2);

View File

@ -1,6 +1,8 @@
set(TARGET bench)
set(TARGET whisper-bench)
add_executable(${TARGET} bench.cpp)
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ${TARGET} RUNTIME)

View File

@ -1,4 +1,4 @@
# bench
# whisper.cpp/examples/bench
A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
the transformer on some random audio data and records the execution time. This way we can have an objective comparison
@ -7,11 +7,8 @@ of the performance of the model for various setups.
Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
```bash
# build the bench tool
$ make bench
# run it on the small.en model using 4 threads
$ ./bench -m ./models/ggml-small.en.bin -t 4
# run the bench too on the small.en model using 4 threads
$ ./build/bin/whisper-bench -m ./models/ggml-small.en.bin -t 4
whisper_model_load: loading model from './models/ggml-small.en.bin'
whisper_model_load: n_vocab = 51864

View File

@ -50,11 +50,11 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " %-7s 0 - whisper\n", "");
fprintf(stderr, " %-7s 1 - memcpy\n", "");
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, "\n");
}

View File

@ -1,6 +1,8 @@
set(TARGET main)
add_executable(${TARGET} main.cpp)
set(TARGET whisper-cli)
add_executable(${TARGET} cli.cpp)
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE common whisper ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ${TARGET} RUNTIME)

View File

@ -1,12 +1,12 @@
# main
# whisper.cpp/examples/cli
This is the main example demonstrating most of the functionality of the Whisper model.
It can be used as a reference for using the `whisper.cpp` library in other projects.
```
./main -h
./build/bin/whisper-cli -h
usage: ./main [options] file0.wav file1.wav ...
usage: ./build-pkg/bin/whisper-cli [options] file0.wav file1.wav ...
options:
-h, --help [default] show this help message and exit
@ -20,9 +20,12 @@ options:
-sow, --split-on-word [false ] split on word rather than on token
-bo N, --best-of N [5 ] number of best candidates to keep
-bs N, --beam-size N [5 ] beam size for beam search
-ac N, --audio-ctx N [0 ] audio context size (0 - all)
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
-tp, --temperature N [0.00 ] The sampling temperature, between 0 and 1
-tpi, --temperature-inc N [0.20 ] The increment of temperature, between 0 and 1
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
-tr, --translate [false ] translate from source language to english
-di, --diarize [false ] stereo audio diarization
@ -38,16 +41,23 @@ options:
-oj, --output-json [false ] output result in a JSON file
-ojf, --output-json-full [false ] include more information in the JSON file
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
-np, --no-prints [false ] do not print anything other than the results
-ps, --print-special [false ] print special tokens
-pc, --print-colors [false ] print colors
-pp, --print-progress [false ] print progress
-nt, --no-timestamps [false ] do not print timestamps
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
-dl, --detect-language [false ] exit after automatically detecting language
--prompt PROMPT [ ] initial prompt
--prompt PROMPT [ ] initial prompt (max n_text_ctx/2 tokens)
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
-f FNAME, --file FNAME [ ] input WAV file path
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
-dtw MODEL --dtw MODEL [ ] compute token-level timestamps
-ls, --log-score [false ] log best decoder scores of tokens
-ng, --no-gpu [false ] disable GPU
-fa, --flash-attn [false ] flash attention
--suppress-regex REGEX [ ] regular expression matching tokens to suppress
--grammar GRAMMAR [ ] GBNF grammar to guide decoding
--grammar-rule RULE [ ] top-level GBNF grammar rule name
--grammar-penalty N [100.0 ] scales down logits of nongrammar tokens
```

View File

@ -1,4 +1,5 @@
#include "common.h"
#include "common-whisper.h"
#include "whisper.h"
#include "grammar-parser.h"
@ -6,12 +7,16 @@
#include <cmath>
#include <fstream>
#include <cstdio>
#include <regex>
#include <string>
#include <thread>
#include <vector>
#include <cstring>
#if defined(_WIN32)
#define NOMINMAX
#include <windows.h>
#endif
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
@ -43,6 +48,7 @@ struct whisper_params {
float word_thold = 0.01f;
float entropy_thold = 2.40f;
float logprob_thold = -1.00f;
float no_speech_thold = 0.6f;
float grammar_penalty = 100.0f;
float temperature = 0.0f;
float temperature_inc = 0.2f;
@ -70,6 +76,7 @@ struct whisper_params {
bool log_score = false;
bool use_gpu = true;
bool flash_attn = false;
bool suppress_nst = false;
std::string language = "en";
std::string prompt;
@ -104,6 +111,11 @@ static char * whisper_param_turn_lowercase(char * in){
return in;
}
static char * requires_value_error(const std::string & arg) {
fprintf(stderr, "error: argument %s requires value\n", arg.c_str());
exit(0);
}
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
@ -122,21 +134,23 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
whisper_print_usage(argc, argv, params);
exit(0);
}
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(argv[++i]); }
else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(argv[++i]); }
else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(argv[++i]); }
else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
else if (arg == "-bo" || arg == "--best-of") { params.best_of = std::stoi(argv[++i]); }
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(argv[++i]); }
else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(argv[++i]); }
else if (arg == "-tp" || arg == "--temperature") { params.temperature = std::stof(argv[++i]); }
else if (arg == "-tpi" || arg == "--temperature-inc") { params.temperature_inc = std::stof(argv[++i]); }
#define ARGV_NEXT (((i + 1) < argc) ? argv[++i] : requires_value_error(arg))
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(ARGV_NEXT); }
else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(ARGV_NEXT); }
else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(ARGV_NEXT); }
else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(ARGV_NEXT); }
else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(ARGV_NEXT); }
else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(ARGV_NEXT); }
else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(ARGV_NEXT); }
else if (arg == "-bo" || arg == "--best-of") { params.best_of = std::stoi(ARGV_NEXT); }
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(ARGV_NEXT); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(ARGV_NEXT); }
else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(ARGV_NEXT); }
else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(ARGV_NEXT); }
else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(ARGV_NEXT); }
else if (arg == "-nth" || arg == "--no-speech-thold") { params.no_speech_thold = std::stof(ARGV_NEXT); }
else if (arg == "-tp" || arg == "--temperature") { params.temperature = std::stof(ARGV_NEXT); }
else if (arg == "-tpi" || arg == "--temperature-inc") { params.temperature_inc = std::stof(ARGV_NEXT); }
else if (arg == "-debug"|| arg == "--debug-mode") { params.debug_mode = true; }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
@ -148,30 +162,31 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
else if (arg == "-olrc" || arg == "--output-lrc") { params.output_lrc = true; }
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
else if (arg == "-fp" || arg == "--font-path") { params.font_path = ARGV_NEXT; }
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
else if (arg == "-oj" || arg == "--output-json") { params.output_jsn = true; }
else if (arg == "-ojf" || arg == "--output-json-full"){ params.output_jsn_full = params.output_jsn = true; }
else if (arg == "-of" || arg == "--output-file") { params.fname_out.emplace_back(argv[++i]); }
else if (arg == "-of" || arg == "--output-file") { params.fname_out.emplace_back(ARGV_NEXT); }
else if (arg == "-np" || arg == "--no-prints") { params.no_prints = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
else if (arg == "-l" || arg == "--language") { params.language = whisper_param_turn_lowercase(argv[++i]); }
else if (arg == "-l" || arg == "--language") { params.language = whisper_param_turn_lowercase(ARGV_NEXT); }
else if (arg == "-dl" || arg == "--detect-language") { params.detect_language = true; }
else if ( arg == "--prompt") { params.prompt = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
else if (arg == "-dtw" || arg == "--dtw") { params.dtw = argv[++i]; }
else if ( arg == "--prompt") { params.prompt = ARGV_NEXT; }
else if (arg == "-m" || arg == "--model") { params.model = ARGV_NEXT; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(ARGV_NEXT); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = ARGV_NEXT; }
else if (arg == "-dtw" || arg == "--dtw") { params.dtw = ARGV_NEXT; }
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
else if ( arg == "--suppress-regex") { params.suppress_regex = argv[++i]; }
else if ( arg == "--grammar") { params.grammar = argv[++i]; }
else if ( arg == "--grammar-rule") { params.grammar_rule = argv[++i]; }
else if ( arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
else if (arg == "-sns" || arg == "--suppress-nst") { params.suppress_nst = true; }
else if ( arg == "--suppress-regex") { params.suppress_regex = ARGV_NEXT; }
else if ( arg == "--grammar") { params.grammar = ARGV_NEXT; }
else if ( arg == "--grammar-rule") { params.grammar_rule = ARGV_NEXT; }
else if ( arg == "--grammar-penalty") { params.grammar_penalty = std::stof(ARGV_NEXT); }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
@ -184,7 +199,8 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
fprintf(stderr, "usage: %s [options] file0 file1 ...\n", argv[0]);
fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n");
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
@ -202,6 +218,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
fprintf(stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n", params.entropy_thold);
fprintf(stderr, " -lpt N, --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n", params.logprob_thold);
fprintf(stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n", params.no_speech_thold);
fprintf(stderr, " -tp, --temperature N [%-7.2f] The sampling temperature, between 0 and 1\n", params.temperature);
fprintf(stderr, " -tpi, --temperature-inc N [%-7.2f] The increment of temperature, between 0 and 1\n",params.temperature_inc);
fprintf(stderr, " -debug, --debug-mode [%-7s] enable debug mode (eg. dump log_mel)\n", params.debug_mode ? "true" : "false");
@ -228,12 +245,13 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " -dl, --detect-language [%-7s] exit after automatically detecting language\n", params.detect_language ? "true" : "false");
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt (max n_text_ctx/2 tokens)\n", params.prompt.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
fprintf(stderr, " --suppress-regex REGEX [%-7s] regular expression matching tokens to suppress\n", params.suppress_regex.c_str());
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
fprintf(stderr, " --grammar-rule RULE [%-7s] top-level GBNF grammar rule name\n", params.grammar_rule.c_str());
@ -904,6 +922,13 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
static void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
int main(int argc, char ** argv) {
#if defined(_WIN32)
// Set the console output code page to UTF-8, while command line arguments
// are still encoded in the system's code page. In this way, we can print
// non-ASCII characters to the console, and access files with non-ASCII paths.
SetConsoleOutputCP(CP_UTF8);
#endif
whisper_params params;
// If the only argument starts with "@", read arguments line-by-line
@ -997,6 +1022,7 @@ int main(int argc, char ** argv) {
if (params.dtw == "large.v1") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V1;
if (params.dtw == "large.v2") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V2;
if (params.dtw == "large.v3") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3;
if (params.dtw == "large.v3.turbo") cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3_TURBO;
if (cparams.dtw_aheads_preset == WHISPER_AHEADS_NONE) {
fprintf(stderr, "error: unknown DTW preset '%s'\n", params.dtw.c_str());
@ -1044,8 +1070,8 @@ int main(int argc, char ** argv) {
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
continue;
}
@ -1120,9 +1146,12 @@ int main(int argc, char ** argv) {
wparams.entropy_thold = params.entropy_thold;
wparams.logprob_thold = params.logprob_thold;
wparams.no_speech_thold = params.no_speech_thold;
wparams.no_timestamps = params.no_timestamps;
wparams.suppress_nst = params.suppress_nst;
whisper_print_user_data user_data = { &params, &pcmf32s, 0 };
const auto & grammar_parsed = params.grammar_parsed;

View File

@ -1,9 +1,10 @@
if (WHISPER_SDL2)
# command
set(TARGET command)
set(TARGET whisper-command)
add_executable(${TARGET} command.cpp)
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ${TARGET} RUNTIME)
endif ()

View File

@ -1,14 +1,14 @@
# command
# whisper.cpp/examples/command
This is a basic Voice Assistant example that accepts voice commands from the microphone.
More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).
```bash
# Run with default arguments and small model
./command -m ./models/ggml-small.en.bin -t 8
./whisper-command -m ./models/ggml-small.en.bin -t 8
# On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
./command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
./whisper-command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
```
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
@ -23,10 +23,10 @@ Initial tests show that this approach might be extremely efficient in terms of p
```bash
# Run in guided mode, the list of allowed commands is in commands.txt
./command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt
./whisper-command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt
# On Raspberry Pi, in guided mode you can use "-ac 128" for extra performance
./command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
./whisper-command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
```
https://user-images.githubusercontent.com/1991296/207435352-8fc4ed3f-bde5-4555-9b8b-aeeb76bee969.mp4
@ -34,7 +34,7 @@ https://user-images.githubusercontent.com/1991296/207435352-8fc4ed3f-bde5-4555-9
## Building
The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
The `whisper-command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
```bash
# Install SDL2
@ -47,5 +47,6 @@ sudo dnf install SDL2 SDL2-devel
# Install SDL2 on Mac OS
brew install sdl2
make command
cmake -B build -DWHISPER_SDL2=ON
cmake --build build --config Release
```

View File

@ -11,16 +11,15 @@
#include "whisper.h"
#include "grammar-parser.h"
#include <sstream>
#include <cassert>
#include <algorithm>
#include <chrono>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <regex>
#include <map>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include <map>
// command-line parameters
struct whisper_params {

View File

@ -72,9 +72,6 @@ bool ggml_common_quantize_0(
case GGML_FTYPE_MOSTLY_IQ4_XS:
case GGML_FTYPE_MOSTLY_IQ1_M:
case GGML_FTYPE_MOSTLY_BF16:
case GGML_FTYPE_MOSTLY_Q4_0_4_4:
case GGML_FTYPE_MOSTLY_Q4_0_4_8:
case GGML_FTYPE_MOSTLY_Q4_0_8_8:
{
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
return false;
@ -212,9 +209,6 @@ bool ggml_common_quantize_0(
case GGML_TYPE_IQ4_XS:
case GGML_TYPE_IQ1_M:
case GGML_TYPE_BF16:
case GGML_TYPE_Q4_0_4_4:
case GGML_TYPE_Q4_0_4_8:
case GGML_TYPE_Q4_0_8_8:
case GGML_TYPE_TQ1_0:
case GGML_TYPE_TQ2_0:
case GGML_TYPE_COUNT:

View File

@ -1,5 +1,7 @@
#include "common-sdl.h"
#include <cstdio>
audio_async::audio_async(int len_ms) {
m_len_ms = len_ms;
@ -157,15 +159,11 @@ void audio_async::callback(uint8_t * stream, int len) {
memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = m_audio.size();
} else {
memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
}
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
}
}

172
examples/common-whisper.cpp Normal file
View File

@ -0,0 +1,172 @@
#define _USE_MATH_DEFINES // for M_PI
#include "common-whisper.h"
#include "common.h"
#include "whisper.h"
// third-party utilities
// use your favorite implementations
#define STB_VORBIS_HEADER_ONLY
#include "stb_vorbis.c" /* Enables Vorbis decoding. */
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#endif
#define MA_NO_DEVICE_IO
#define MA_NO_THREADING
#define MA_NO_ENCODING
#define MA_NO_GENERATION
#define MA_NO_RESOURCE_MANAGER
#define MA_NO_NODE_GRAPH
#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#include <cstring>
#include <fstream>
#ifdef WHISPER_FFMPEG
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
#endif
bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
std::vector<uint8_t> audio_data; // used for pipe input from stdin or ffmpeg decoding output
ma_result result;
ma_decoder_config decoder_config;
ma_decoder decoder;
decoder_config = ma_decoder_config_init(ma_format_f32, stereo ? 2 : 1, WHISPER_SAMPLE_RATE);
if (fname == "-") {
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
#endif
uint8_t buf[1024];
while (true)
{
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
audio_data.insert(audio_data.end(), buf, buf + n);
}
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result));
return false;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
}
else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, audio_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());
return false;
}
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
return false;
}
#else
if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
return false;
}
#endif
}
ma_uint64 frame_count;
ma_uint64 frames_read;
if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result));
return false;
}
pcmf32.resize(stereo ? frame_count*2 : frame_count);
if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) {
fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result));
return false;
}
if (stereo) {
pcmf32s.resize(2);
pcmf32s[0].resize(frame_count);
pcmf32s[1].resize(frame_count);
for (uint64_t i = 0; i < frame_count; i++) {
pcmf32s[0][i] = pcmf32[2*i];
pcmf32s[1][i] = pcmf32[2*i + 1];
}
}
ma_decoder_uninit(&decoder);
return true;
}
// 500 -> 00:05.000
// 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma) {
int64_t msec = t * 10;
int64_t hr = msec / (1000 * 60 * 60);
msec = msec - hr * (1000 * 60 * 60);
int64_t min = msec / (1000 * 60);
msec = msec - min * (1000 * 60);
int64_t sec = msec / 1000;
msec = msec - sec * 1000;
char buf[32];
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
return std::string(buf);
}
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
}
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id) {
std::ofstream speak_file(path.c_str());
if (speak_file.fail()) {
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
return false;
} else {
speak_file.write(text.c_str(), text.size());
speak_file.close();
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
return false;
}
}
return true;
}
#undef STB_VORBIS_HEADER_ONLY
#include "stb_vorbis.c"

24
examples/common-whisper.h Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#include <string>
#include <vector>
#include <cstdint>
// Read WAV audio file and store the PCM data into pcmf32
// fname can be a buffer of WAV data instead of a filename
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
bool read_audio_data(
const std::string & fname,
std::vector<float> & pcmf32,
std::vector<std::vector<float>> & pcmf32s,
bool stereo);
// convert timestamp to string, 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma = false);
// given a timestamp get the sample
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);

View File

@ -2,33 +2,18 @@
#include "common.h"
// third-party utilities
// use your favorite implementations
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include <cmath>
#include <codecvt>
#include <cstring>
#include <fstream>
#include <regex>
#include <locale>
#include <codecvt>
#include <regex>
#include <sstream>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#ifdef WHISPER_FFMPEG
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
#endif
// Function to check if the next argument exists
static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
if (i + 1 < argc && argv[i + 1][0] != '-') {
@ -624,129 +609,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
}
bool is_wav_buffer(const std::string buf) {
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") {
return false;
}
uint32_t chunk_size = *reinterpret_cast<const uint32_t*>(buf.data() + 4);
if (chunk_size + 8 != buf.size()) {
return false;
}
return true;
}
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
drwav wav;
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
if (fname == "-") {
{
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
#endif
uint8_t buf[1024];
while (true)
{
const size_t n = fread(buf, 1, sizeof(buf), stdin);
if (n == 0) {
break;
}
wav_data.insert(wav_data.end(), buf, buf + n);
}
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from stdin\n");
return false;
}
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
}
else if (is_wav_buffer(fname)) {
if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) {
fprintf(stderr, "error: failed to open WAV file from fname buffer\n");
return false;
}
}
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
return false;
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to read wav data as wav \n");
return false;
}
#else
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
return false;
#endif
}
if (wav.channels != 1 && wav.channels != 2) {
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
if (stereo && wav.channels != 2) {
fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
drwav_uninit(&wav);
return false;
}
if (wav.bitsPerSample != 16) {
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
drwav_uninit(&wav);
return false;
}
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
std::vector<int16_t> pcm16;
pcm16.resize(n*wav.channels);
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
drwav_uninit(&wav);
// convert to mono, float
pcmf32.resize(n);
if (wav.channels == 1) {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[i])/32768.0f;
}
} else {
for (uint64_t i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
}
}
if (stereo) {
// convert to stereo, float
pcmf32s.resize(2);
pcmf32s[0].resize(n);
pcmf32s[1].resize(n);
for (uint64_t i = 0; i < n; i++) {
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
}
}
return true;
}
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
const float rc = 1.0f / (2.0f * M_PI * cutoff);
const float dt = 1.0f / sample_rate;
@ -822,90 +684,7 @@ float similarity(const std::string & s0, const std::string & s1) {
return 1.0f - (dist / std::max(s0.size(), s1.size()));
}
bool sam_params_parse(int argc, char ** argv, sam_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
if (arg == "-s" || arg == "--seed") {
params.seed = std::stoi(argv[++i]);
} else if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
params.model = argv[++i];
} else if (arg == "-i" || arg == "--inp") {
params.fname_inp = argv[++i];
} else if (arg == "-o" || arg == "--out") {
params.fname_out = argv[++i];
} else if (arg == "-h" || arg == "--help") {
sam_print_usage(argc, argv, params);
exit(0);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
sam_print_usage(argc, argv, params);
exit(0);
}
}
return true;
}
void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
fprintf(stderr, " -i FNAME, --inp FNAME\n");
fprintf(stderr, " input file (default: %s)\n", params.fname_inp.c_str());
fprintf(stderr, " -o FNAME, --out FNAME\n");
fprintf(stderr, " output file (default: %s)\n", params.fname_out.c_str());
fprintf(stderr, "\n");
}
// 500 -> 00:05.000
// 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma) {
int64_t msec = t * 10;
int64_t hr = msec / (1000 * 60 * 60);
msec = msec - hr * (1000 * 60 * 60);
int64_t min = msec / (1000 * 60);
msec = msec - min * (1000 * 60);
int64_t sec = msec / 1000;
msec = msec - sec * 1000;
char buf[32];
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
return std::string(buf);
}
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {
return std::max(0, std::min((int) n_samples - 1, (int) ((t*whisper_sample_rate)/100)));
}
bool is_file_exist(const char *fileName)
{
std::ifstream infile(fileName);
bool is_file_exist(const char * filename) {
std::ifstream infile(filename);
return infile.good();
}
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id)
{
std::ofstream speak_file(path.c_str());
if (speak_file.fail()) {
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
return false;
} else {
speak_file.write(text.c_str(), text.size());
speak_file.close();
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
return false;
}
}
return true;
}

View File

@ -11,8 +11,6 @@
#include <fstream>
#include <sstream>
#define COMMON_SAMPLE_RATE 16000
//
// GPT CLI argument parsing
//
@ -136,19 +134,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
// Audio utils
//
// Check if a buffer is a WAV audio file
bool is_wav_buffer(const std::string buf);
// Read WAV audio file and store the PCM data into pcmf32
// fname can be a buffer of WAV data instead of a filename
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
bool read_wav(
const std::string & fname,
std::vector<float> & pcmf32,
std::vector<std::vector<float>> & pcmf32s,
bool stereo);
// Write PCM data into WAV audio file
class wav_writer {
private:
@ -266,23 +251,6 @@ bool vad_simple(
// compute similarity between two strings using Levenshtein distance
float similarity(const std::string & s0, const std::string & s1);
//
// SAM argument parsing
//
struct sam_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
std::string fname_inp = "img.jpg";
std::string fname_out = "img.out";
};
bool sam_params_parse(int argc, char ** argv, sam_params & params);
void sam_print_usage(int argc, char ** argv, const sam_params & params);
//
// Terminal utils
//
@ -330,14 +298,5 @@ const std::vector<std::string> k_colors = {
// Other utils
//
// convert timestamp to string, 6000 -> 01:00.000
std::string to_timestamp(int64_t t, bool comma = false);
// given a timestamp get the sample
int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
// check if file exists using ifstream
bool is_file_exist(const char *fileName);
// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);
bool is_file_exist(const char * filename);

View File

@ -0,0 +1,4 @@
add_executable(main ./deprecation-warning.cpp)
add_executable(bench ./deprecation-warning.cpp)
add_executable(stream ./deprecation-warning.cpp)
add_executable(command ./deprecation-warning.cpp)

View File

@ -0,0 +1,17 @@
# Migration notice for binary filenames
> [!IMPORTANT]
[2024 Dec 20] Binaries have been renamed w/ a `whisper-` prefix. `main` is now `whisper-cli`, `server` is `whisper-server`, etc (https://github.com/ggerganov/whisper.cpp/pull/2648)
This migration was important, but it is a breaking change that may not always be immediately obvious to users.
Please update all scripts and workflows to use the new binary names.
| Old Filename | New Filename |
| ---- | ---- |
| main | whisper-cli |
| bench | whisper-bench |
| stream | whisper-stream |
| command | whisper-command |
| server | whisper-server |
| talk-llama | whisper-talk-llama |

View File

@ -0,0 +1,38 @@
// Warns users that this filename was deprecated, and provides a link for more information.
#include <cstdio>
#include <string>
// Main
int main(int argc, char** argv) {
std::string filename = "main";
if (argc >= 1) {
filename = argv[0];
}
// Get only the program name from the full path
size_t pos = filename.find_last_of("/\\");
if (pos != std::string::npos) {
filename = filename.substr(pos+1);
}
// Append "whisper-" to the beginning of filename to get the replacemnt filename
std::string replacement_filename = "whisper-" + filename;
// The exception is if the filename is "main", then our replacement filename is "whisper-cli"
if (filename == "main") {
replacement_filename = "whisper-cli";
}
if (filename == "main.exe") {
replacement_filename = "whisper-cli.exe";
}
fprintf(stdout, "\n");
fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
fprintf(stdout, " Please use '%s' instead.\n", replacement_filename.c_str());
fprintf(stdout, " See https://github.com/ggerganov/whisper.cpp/tree/master/examples/deprecation-warning/README.md for more information.\n");
fprintf(stdout, "\n");
return EXIT_FAILURE;
}

File diff suppressed because it is too large Load Diff

View File

@ -204,8 +204,6 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
const size_t errbuffsize = 1024;
char errbuff[errbuffsize];
av_register_all(); // from avformat. Still a must-have call for ffmpeg v3! (can be skipped for later versions)
fmt_ctx = avformat_alloc_context();
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);

View File

@ -11,7 +11,7 @@
# Press Ctrl+C to stop recording
#
executable="./main"
executable="./build/bin/whisper-cli"
model="base.en"
model_path="models/ggml-$model.bin"
@ -41,20 +41,17 @@ fi
# record some raw audio
sox -d rec.wav
# resample to 16kHz
ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1
# run Whisper
echo "Processing ..."
./main -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1
${executable} -m models/ggml-base.en.bin rec.wav -owts > /dev/null 2>&1
# generate Karaoke video
echo "Generating video ..."
source rec16.wav.wts > /dev/null 2>&1
source rec.wav.wts > /dev/null 2>&1
# play the video
echo "Playing ./rec16.wav.mp4 ..."
ffplay -loglevel 0 -autoexit ./rec16.wav.mp4
ffplay -loglevel 0 -autoexit ./rec.wav.mp4
echo "Done"
exit 0

View File

@ -14,7 +14,7 @@ model="base.en"
check_requirements()
{
if ! command -v ./main &>/dev/null; then
if ! command -v ./build/bin/whisper-cli &>/dev/null; then
echo "whisper.cpp main executable is required (make)"
exit 1
fi
@ -100,7 +100,7 @@ while [ $running -eq 1 ]; do
err=$(cat /tmp/whisper-live.err | wc -l)
done
./main -t 8 -m ./models/ggml-${model}.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
./build/bin/whisper-cli -t 8 -m ./models/ggml-${model}.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
while [ $SECONDS -lt $((($i+1)*$step_s)) ]; do
sleep 1
@ -109,4 +109,4 @@ while [ $running -eq 1 ]; do
done
killall -v ffmpeg
killall -v main
killall -v whisper-cli

View File

@ -3,14 +3,15 @@
#include "whisper.h"
#include "json.hpp"
#include <iostream>
#include <cassert>
#include <chrono>
#include <cstdio>
#include <deque>
#include <iostream>
#include <set>
#include <string>
#include <thread>
#include <vector>
#include <deque>
#include <set>
using json = nlohmann::json;
@ -181,7 +182,7 @@ static json unguided_transcription(struct whisper_context * ctx, audio_async &au
wparams.n_threads = params.n_threads;
wparams.audio_ctx = params.audio_ctx;
wparams.suppress_non_speech_tokens = true;
wparams.suppress_nst = true;
// run the transformer and a single decoding pass
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
fprintf(stderr, "%s: ERROR: whisper_full() failed\n", __func__);
@ -225,7 +226,7 @@ static json guided_transcription(struct whisper_context * ctx, audio_async &audi
wparams.prompt_tokens = cs.prompt_tokens.data();
wparams.prompt_n_tokens = cs.prompt_tokens.size();
// TODO: properly expose as option
wparams.suppress_non_speech_tokens = true;
wparams.suppress_nst = true;
// run the transformer and a single decoding pass
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {

93468
examples/miniaudio.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
set(TARGET server)
set(TARGET whisper-server)
add_executable(${TARGET} server.cpp httplib.h)
include(DefaultTargetOptions)
@ -8,3 +8,5 @@ target_link_libraries(${TARGET} PRIVATE common json_cpp whisper ${CMAKE_THREAD_L
if (WIN32)
target_link_libraries(${TARGET} PRIVATE ws2_32)
endif()
install(TARGETS ${TARGET} RUNTIME)

Some files were not shown because too many files have changed in this diff Show More