2024-06-26 18:34:09 +02:00
# Define the default target now so that it is always the first target
BUILD_TARGETS = \
main \
bench \
quantize \
server \
tests/test-c.o
# Binaries only useful for tests
TEST_TARGETS = \
tests/test-backend-ops
# Deprecation aliases
i f d e f W H I S P E R _ C U B L A S
$( error WHISPER_CUBLAS is removed . Use GGML_CUDA instead .)
e n d i f
i f d e f W H I S P E R _ C U D A
GGML_CUDA := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ K O M P U T E
GGML_KOMPUTE := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ M E T A L
GGML_METAL := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ O P E N M P
GGML_OPENMP := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ R P C
GGML_RPC := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ S Y C L
GGML_SYCL := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ S Y C L _ F 1 6
GGML_SYCL_F16 := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ O P E N B L A S
GGML_OPENBLAS := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ O P E N B L A S 6 4
GGML_OPENBLAS64 := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ B L I S
GGML_BLIS := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ N O _ W H I S P E R F I L E
GGML_NO_WHISPERFILE := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ N O _ A C C E L E R A T E
GGML_NO_ACCELERATE := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ N O _ O P E N M P
GGML_NO_OPENMP := 1
DEPRECATE_WARNING := 1
e n d i f
i f d e f W H I S P E R _ N O _ M E T A L
GGML_NO_METAL := 1
DEPRECATE_WARNING := 1
e n d i f
2023-04-30 11:14:33 +02:00
2022-11-02 07:46:49 +01:00
i f n d e f U N A M E _ S
2022-10-05 19:41:35 +02:00
UNAME_S := $( shell uname -s)
2022-11-02 07:46:49 +01:00
e n d i f
i f n d e f U N A M E _ P
2022-10-05 19:41:35 +02:00
UNAME_P := $( shell uname -p)
2022-11-02 07:46:49 +01:00
e n d i f
i f n d e f U N A M E _ M
2022-10-05 19:41:35 +02:00
UNAME_M := $( shell uname -m)
2022-11-02 07:46:49 +01:00
e n d i f
2022-10-05 19:41:35 +02:00
2024-04-28 23:54:21 +02:00
# In GNU make default CXX is g++ instead of c++. Let's fix that so that users
# of non-gcc compilers don't have to provide g++ alias or wrapper.
DEFCC := cc
DEFCXX := c++
i f e q ( $( origin CC ) , d e f a u l t )
CC := $( DEFCC)
e n d i f
i f e q ( $( origin CXX ) , d e f a u l t )
CXX := $( DEFCXX)
e n d i f
2022-10-19 01:01:53 +02:00
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
i f e q ( $( UNAME_S ) , D a r w i n )
2024-06-26 18:34:09 +02:00
ifndef GGML_NO_METAL
GGML_METAL := 1
endif
GGML_NO_OPENMP := 1
2024-06-16 18:10:20 +02:00
2022-10-19 01:01:53 +02:00
ifneq ( $( UNAME_P) ,arm)
2024-06-26 18:34:09 +02:00
SYSCTL_M := $( shell sysctl -n hw.optional.arm64 2>/dev/null)
2022-10-19 01:01:53 +02:00
ifeq ( $( SYSCTL_M) ,1)
2022-11-02 17:00:19 +01:00
# UNAME_P := arm
# UNAME_M := arm64
2022-10-19 01:01:53 +02:00
warn := $( warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
endif
endif
e n d i f
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ M E T A L
GGML_METAL_EMBED_LIBRARY := 1
e n d i f
i f d e f G G M L _ R P C
BUILD_TARGETS += rpc-server
e n d i f
i f e q ( $( shell sdl 2-config --cflags --libs 2>/dev /null ) , )
e l s e
BUILD_TARGETS += \
command \
stream \
lsp \
talk \
talk-llama
e n d i f
default : $( BUILD_TARGETS )
test : $( TEST_TARGETS )
@failures= 0; \
for test_target in $( TEST_TARGETS) ; do \
echo " Running test $$ test_target... " ; \
./$$ test_target; \
if [ $$ ? -ne 0 ] ; then \
printf 'Test %s FAILED!\n\n' $$ test_target; \
failures = $$ ( ( failures + 1 ) ) ; \
else \
printf 'Test %s passed.\n\n' $$ test_target; \
fi ; \
done ; \
failures = $$ ( ( failures + $$ ? ) ) ; \
if [ $$ failures -gt 0 ] ; then \
printf '\n%s tests failed.\n' $$ failures; \
exit 1; \
fi
@echo 'All tests passed.'
all : $( BUILD_TARGETS ) $( TEST_TARGETS )
i f d e f R I S C V _ C R O S S _ C O M P I L E
CC := riscv64-unknown-linux-gnu-gcc
CXX := riscv64-unknown-linux-gnu-g++
e n d i f
2022-10-05 19:41:35 +02:00
#
# Compile flags
#
2024-06-26 18:34:09 +02:00
# keep standard at C11 and C++11
MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Iexamples
MK_CFLAGS = -std= c11 -fPIC
MK_CXXFLAGS = -std= c++11 -fPIC
MK_NVCCFLAGS = -std= c++11
i f n d e f W H I S P E R _ N O _ C C A C H E
CCACHE := $( shell which ccache)
i f d e f C C A C H E
export CCACHE_SLOPPINESS = time_macros
$( info I ccache found , compilation results will be cached . Disable with WHISPER_NO_CCACHE .)
CC := $( CCACHE) $( CC)
CXX := $( CCACHE) $( CXX)
e l s e
$( info I ccache not found . Consider installing it for faster compilation .)
e n d i f # CCACHE
e n d i f # WHISPER_NO_CCACHE
2024-02-09 16:26:29 +01:00
2023-09-07 11:36:14 +02:00
# clock_gettime came in POSIX.1b (1993)
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
# posix_memalign came in POSIX.1-2001 / SUSv3
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D_XOPEN_SOURCE= 600
2023-09-07 11:36:14 +02:00
# Somehow in OpenBSD whenever POSIX conformance is specified
# some string functions rely on locale_t availability,
# which was introduced in POSIX.1-2008, forcing us to go higher
i f e q ( $( UNAME_S ) , O p e n B S D )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE= 700
2023-09-07 11:36:14 +02:00
e n d i f
2024-06-26 18:34:09 +02:00
# Data types, macros and functions related to controlling CPU affinity and
# some memory allocation are available on Linux through GNU extensions in libc
2023-09-07 11:36:14 +02:00
i f e q ( $( UNAME_S ) , L i n u x )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D_GNU_SOURCE
2023-07-02 20:53:52 +02:00
e n d i f
2023-03-22 19:51:42 +01:00
2023-06-28 21:34:50 +02:00
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
# and on macOS its availability depends on enabling Darwin extensions
2023-09-07 11:36:14 +02:00
# similarly on DragonFly, enabling BSD extensions is necessary
2023-06-28 21:34:50 +02:00
i f e q ( $( UNAME_S ) , D a r w i n )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D_DARWIN_C_SOURCE
2023-06-28 21:34:50 +02:00
e n d i f
2023-09-07 11:36:14 +02:00
i f e q ( $( UNAME_S ) , D r a g o n F l y )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D__BSD_VISIBLE
2023-09-07 11:36:14 +02:00
e n d i f
# alloca is a non-standard interface that is not visible on BSDs when
# POSIX conformance is specified, but not all of them provide a clean way
# to enable it in such cases
i f e q ( $( UNAME_S ) , F r e e B S D )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D__BSD_VISIBLE
2023-09-07 11:36:14 +02:00
e n d i f
i f e q ( $( UNAME_S ) , N e t B S D )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D_NETBSD_SOURCE
2023-09-07 11:36:14 +02:00
e n d i f
i f e q ( $( UNAME_S ) , O p e n B S D )
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -D_BSD_SOURCE
e n d i f
i f d e f G G M L _ S C H E D _ M A X _ C O P I E S
MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES= $( GGML_SCHED_MAX_COPIES)
e n d i f
i f d e f W H I S P E R _ D E B U G
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g
MK_NVCCFLAGS += -O0 -g
ifeq ( $( UNAME_S) ,Linux)
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
endif
e l s e
2024-06-26 20:20:45 +02:00
MK_CPPFLAGS += -DNDEBUG
MK_CFLAGS += -O3
MK_CXXFLAGS += -O3
MK_NVCCFLAGS += -O3
2024-06-26 18:34:09 +02:00
e n d i f
i f d e f W H I S P E R _ S A N I T I Z E _ T H R E A D
MK_CFLAGS += -fsanitize= thread -g
MK_CXXFLAGS += -fsanitize= thread -g
MK_LDFLAGS += -fsanitize= thread -g
e n d i f
i f d e f W H I S P E R _ S A N I T I Z E _ A D D R E S S
MK_CFLAGS += -fsanitize= address -fno-omit-frame-pointer -g
MK_CXXFLAGS += -fsanitize= address -fno-omit-frame-pointer -g
MK_LDFLAGS += -fsanitize= address -fno-omit-frame-pointer -g
e n d i f
i f d e f W H I S P E R _ S A N I T I Z E _ U N D E F I N E D
MK_CFLAGS += -fsanitize= undefined -g
MK_CXXFLAGS += -fsanitize= undefined -g
MK_LDFLAGS += -fsanitize= undefined -g
e n d i f
i f d e f W H I S P E R _ S E R V E R _ V E R B O S E
MK_CPPFLAGS += -DSERVER_VERBOSE= $( WHISPER_SERVER_VERBOSE)
e n d i f
i f d e f W H I S P E R _ S E R V E R _ S S L
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
MK_LDFLAGS += -lssl -lcrypto
e n d i f
i f d e f W H I S P E R _ D I S A B L E _ L O G S
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
e n d i f # WHISPER_DISABLE_LOGS
# warnings
WARN_FLAGS = \
-Wall \
-Wextra \
-Wpedantic \
-Wcast-qual \
-Wno-unused-function
MK_CFLAGS += \
$( WARN_FLAGS) \
-Wshadow \
-Wstrict-prototypes \
-Wpointer-arith \
-Wmissing-prototypes \
-Werror= implicit-int \
-Werror= implicit-function-declaration
MK_CXXFLAGS += \
$( WARN_FLAGS) \
-Wmissing-declarations \
-Wmissing-noreturn
i f e q ( $( WHISPER_FATAL_WARNINGS ) , 1 )
MK_CFLAGS += -Werror
MK_CXXFLAGS += -Werror
e n d i f
# this version of Apple ld64 is buggy
i f n e q '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
2023-09-07 11:36:14 +02:00
e n d i f
2023-06-28 21:34:50 +02:00
2022-10-05 19:41:35 +02:00
# OS specific
# TODO: support Windows
2024-06-26 18:34:09 +02:00
i f n e q '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
MK_CFLAGS += -pthread
MK_CXXFLAGS += -pthread
2022-12-08 06:34:19 +01:00
e n d i f
2022-10-05 19:41:35 +02:00
2024-01-15 14:48:13 +01:00
# detect Windows
i f n e q ( $( findstring _NT ,$ ( UNAME_S ) ) , )
_WIN32 := 1
e n d i f
2024-06-26 18:34:09 +02:00
# library name prefix
i f n e q ( $( _WIN 32) , 1 )
LIB_PRE := lib
e n d i f
# Dynamic Shared Object extension
i f n e q ( $( _WIN 32) , 1 )
DSO_EXT := .so
e l s e
DSO_EXT := .dll
e n d i f
2024-01-15 14:48:13 +01:00
# Windows Sockets 2 (Winsock) for network-capable apps
i f e q ( $( _WIN 32) , 1 )
LWINSOCK2 := -lws2_32
e n d i f
2024-06-26 18:34:09 +02:00
i f d e f W H I S P E R _ G P R O F
MK_CFLAGS += -pg
MK_CXXFLAGS += -pg
e n d i f
2022-10-05 19:41:35 +02:00
# Architecture specific
2022-10-08 16:35:55 +02:00
# TODO: probably these flags need to be tweaked on some architectures
2022-10-17 17:09:17 +02:00
# feel free to update the Makefile for your architecture and send a pull request or issue
2023-08-25 14:20:44 +02:00
2024-06-26 18:34:09 +02:00
i f n d e f R I S C V
2024-05-08 17:32:43 +02:00
2024-06-26 18:34:09 +02:00
i f e q ( $( UNAME_M ) , $( filter $ ( UNAME_M ) ,x 86_ 64 i 686 amd 64) )
# Use all CPU extensions that are available:
MK_CFLAGS += -march= native -mtune= native
HOST_CXXFLAGS += -march= native -mtune= native
2024-05-08 17:32:43 +02:00
2024-06-26 18:34:09 +02:00
# Usage AVX-only
#MK_CFLAGS += -mfma -mf16c -mavx
#MK_CXXFLAGS += -mfma -mf16c -mavx
2024-05-08 17:32:43 +02:00
2024-06-26 18:34:09 +02:00
# Usage SSSE3-only (Not is SSE3!)
#MK_CFLAGS += -mssse3
#MK_CXXFLAGS += -mssse3
e n d i f
2023-08-25 14:20:44 +02:00
2024-06-26 18:34:09 +02:00
i f n e q '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
# The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
# https://github.com/ggerganov/llama.cpp/issues/2922
MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
2024-05-08 17:32:43 +02:00
2024-06-26 18:34:09 +02:00
# Target Windows 8 for PrefetchVirtualMemory
MK_CPPFLAGS += -D_WIN32_WINNT= 0x602
e n d i f
2024-05-08 17:32:43 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter aarch 64%,$ ( UNAME_M ) ) , )
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
# Nvidia Jetson
MK_CFLAGS += -mcpu= native
MK_CXXFLAGS += -mcpu= native
JETSON_RELEASE_INFO = $( shell jetson_release)
ifdef JETSON_RELEASE_INFO
ifneq ( $( filter TX2%,$( JETSON_RELEASE_INFO) ) ,)
JETSON_EOL_MODULE_DETECT = 1
CC = aarch64-unknown-linux-gnu-gcc
cxx = aarch64-unknown-linux-gnu-g++
2022-11-23 12:23:35 +01:00
endif
2024-06-26 18:34:09 +02:00
endif
e n d i f
2023-08-25 14:20:44 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter armv 6%,$ ( UNAME_M ) ) , )
# Raspberry Pi 1, Zero
MK_CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access
MK_CXXFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access
e n d i f
2024-04-15 19:02:09 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter armv 7%,$ ( UNAME_M ) ) , )
# Raspberry Pi 2
MK_CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access -funsafe-math-optimizations
MK_CXXFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access -funsafe-math-optimizations
e n d i f
2024-04-15 19:02:09 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter armv 8%,$ ( UNAME_M ) ) , )
# Raspberry Pi 3, 4, Zero 2 (32-bit)
MK_CFLAGS += -mfp16-format= ieee -mno-unaligned-access
MK_CXXFLAGS += -mfp16-format= ieee -mno-unaligned-access
2022-10-05 19:41:35 +02:00
e n d i f
2023-04-29 09:55:24 +02:00
2023-01-23 19:48:10 +01:00
i f n e q ( $( filter ppc 64%,$ ( UNAME_M ) ) , )
2022-12-23 07:19:19 +01:00
POWER9_M := $( shell grep "POWER9" /proc/cpuinfo)
ifneq ( ,$( findstring POWER9,$( POWER9_M) ) )
2024-06-26 18:34:09 +02:00
MK_CFLAGS += -mcpu= power9
MK_CXXFLAGS += -mcpu= power9
2023-01-23 19:48:10 +01:00
endif
2022-12-23 07:19:19 +01:00
e n d i f
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter ppc 64le %,$ ( UNAME_M ) ) , )
MK_CFLAGS += -mcpu= powerpc64le
MK_CXXFLAGS += -mcpu= powerpc64le
CUDA_POWER_ARCH = 1
2022-10-05 19:41:35 +02:00
e n d i f
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f n e q ( $( filter loongarch 64%,$ ( UNAME_M ) ) , )
MK_CFLAGS += -mlasx
MK_CXXFLAGS += -mlasx
2023-04-29 09:55:24 +02:00
e n d i f
2024-06-26 18:34:09 +02:00
e l s e
MK_CFLAGS += -march= rv64gcv -mabi= lp64d
MK_CXXFLAGS += -march= rv64gcv -mabi= lp64d
2023-04-15 12:21:27 +02:00
e n d i f
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f n d e f G G M L _ N O _ A C C E L E R A T E
# Mac OS - include Accelerate framework.
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
2023-09-15 11:18:18 +02:00
ifeq ( $( UNAME_S) ,Darwin)
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
MK_LDFLAGS += -framework Accelerate
OBJ_GGML += ggml/src/ggml-blas.o
endif
e n d i f # GGML_NO_ACCELERATE
i f n d e f G G M L _ N O _ O P E N M P
MK_CPPFLAGS += -DGGML_USE_OPENMP
MK_CFLAGS += -fopenmp
MK_CXXFLAGS += -fopenmp
e n d i f # GGML_NO_OPENMP
i f d e f G G M L _ O P E N B L A S
MK_CPPFLAGS += -DGGML_USE_BLAS $( shell pkg-config --cflags-only-I openblas)
MK_CFLAGS += $( shell pkg-config --cflags-only-other openblas)
MK_LDFLAGS += $( shell pkg-config --libs openblas)
OBJ_GGML += ggml/src/ggml-blas.o
e n d i f # GGML_OPENBLAS
i f d e f G G M L _ O P E N B L A S 6 4
MK_CPPFLAGS += -DGGML_USE_BLAS $( shell pkg-config --cflags-only-I openblas64)
MK_CFLAGS += $( shell pkg-config --cflags-only-other openblas64)
MK_LDFLAGS += $( shell pkg-config --libs openblas64)
OBJ_GGML += ggml/src/ggml-blas.o
e n d i f # GGML_OPENBLAS64
i f d e f G G M L _ B L I S
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
OBJ_GGML += ggml/src/ggml-blas.o
e n d i f # GGML_BLIS
i f d e f G G M L _ R P C
MK_CPPFLAGS += -DGGML_USE_RPC
OBJ_GGML += ggml/src/ggml-rpc.o
e n d i f # GGML_RPC
OBJ_CUDA_TMPL = $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu) )
OBJ_CUDA_TMPL += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu) )
i f d e f G G M L _ C U D A _ F A _ A L L _ Q U A N T S
OBJ_CUDA_TMPL += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu) )
e l s e
OBJ_CUDA_TMPL += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) )
OBJ_CUDA_TMPL += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) )
OBJ_CUDA_TMPL += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu) )
e n d i f # GGML_CUDA_FA_ALL_QUANTS
i f d e f G G M L _ C U D A
ifneq ( '' , '$(wildcard /opt/cuda)' )
CUDA_PATH ?= /opt/cuda
else
CUDA_PATH ?= /usr/local/cuda
2023-09-15 11:18:18 +02:00
endif
2024-06-26 18:34:09 +02:00
MK_CPPFLAGS += -DGGML_USE_CUDA -I$( CUDA_PATH) /include -I$( CUDA_PATH) /targets/$( UNAME_M) -linux/include -DGGML_CUDA_USE_GRAPHS
2024-06-26 21:25:25 +02:00
#MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$( CUDA_PATH) /lib64 -L/usr/lib64 -L$( CUDA_PATH) /targets/$( UNAME_M) -linux/lib -L$( CUDA_PATH) /lib64/stubs -L/usr/lib/wsl/lib
2024-06-26 18:34:09 +02:00
MK_NVCCFLAGS += -use_fast_math
2024-06-16 18:10:20 +02:00
2024-06-26 18:34:09 +02:00
OBJ_GGML += ggml/src/ggml-cuda.o
OBJ_GGML += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/*.cu) )
OBJ_GGML += $( OBJ_CUDA_TMPL)
2024-06-16 18:10:20 +02:00
2024-06-26 21:25:25 +02:00
#OBJ_WHISPER += src/whisper-mel-cuda.o
2024-06-16 18:10:20 +02:00
2024-06-26 18:34:09 +02:00
i f d e f W H I S P E R _ F A T A L _ W A R N I N G S
MK_NVCCFLAGS += -Werror all-warnings
e n d i f # WHISPER_FATAL_WARNINGS
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f n d e f J E T S O N _ E O L _ M O D U L E _ D E T E C T
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
e n d i f # JETSON_EOL_MODULE_DETECT
i f d e f W H I S P E R _ D E B U G
MK_NVCCFLAGS += -lineinfo
e n d i f # WHISPER_DEBUG
i f d e f G G M L _ C U D A _ D E B U G
MK_NVCCFLAGS += --device-debug
e n d i f # GGML_CUDA_DEBUG
i f d e f G G M L _ C U D A _ N V C C
NVCC = $( CCACHE) $( GGML_CUDA_NVCC)
e l s e
NVCC = $( CCACHE) nvcc
e n d i f #GGML_CUDA_NVCC
i f d e f C U D A _ D O C K E R _ A R C H
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch= $( CUDA_DOCKER_ARCH)
e l s e i f n d e f C U D A _ P O W E R _ A R C H
MK_NVCCFLAGS += -arch= native
e n d i f # CUDA_DOCKER_ARCH
i f d e f G G M L _ C U D A _ F O R C E _ D M M V
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
e n d i f # GGML_CUDA_FORCE_DMMV
i f d e f G G M L _ C U D A _ F O R C E _ M M Q
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
e n d i f # GGML_CUDA_FORCE_MMQ
i f d e f G G M L _ C U D A _ D M M V _ X
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X= $( GGML_CUDA_DMMV_X)
e l s e
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X= 32
e n d i f # GGML_CUDA_DMMV_X
i f d e f G G M L _ C U D A _ M M V _ Y
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y= $( GGML_CUDA_MMV_Y)
e l s e i f d e f G G M L _ C U D A _ D M M V _ Y
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y= $( GGML_CUDA_DMMV_Y) # for backwards compatibility
e l s e
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y= 1
e n d i f # GGML_CUDA_MMV_Y
i f d e f G G M L _ C U D A _ F 1 6
MK_NVCCFLAGS += -DGGML_CUDA_F16
e n d i f # GGML_CUDA_F16
i f d e f G G M L _ C U D A _ D M M V _ F 1 6
MK_NVCCFLAGS += -DGGML_CUDA_F16
e n d i f # GGML_CUDA_DMMV_F16
i f d e f G G M L _ C U D A _ K Q U A N T S _ I T E R
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION= $( GGML_CUDA_KQUANTS_ITER)
e l s e
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION= 2
2024-03-27 17:55:10 +01:00
e n d i f
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ C U D A _ P E E R _ M A X _ B A T C H _ S I Z E
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE= $( GGML_CUDA_PEER_MAX_BATCH_SIZE)
2024-06-16 12:07:43 +02:00
e l s e
2024-06-26 18:34:09 +02:00
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE= 128
e n d i f # GGML_CUDA_PEER_MAX_BATCH_SIZE
2024-06-16 12:07:43 +02:00
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ C U D A _ N O _ P E E R _ C O P Y
MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
e n d i f # GGML_CUDA_NO_PEER_COPY
i f d e f G G M L _ C U D A _ C C B I N
MK_NVCCFLAGS += -ccbin $( GGML_CUDA_CCBIN)
e n d i f # GGML_CUDA_CCBIN
i f d e f G G M L _ C U D A _ F A _ A L L _ Q U A N T S
MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
e n d i f # GGML_CUDA_FA_ALL_QUANTS
i f d e f J E T S O N _ E O L _ M O D U L E _ D E T E C T
d e f i n e N V C C _ C O M P I L E
$( NVCC) -I. -Icommon -D_XOPEN_SOURCE= 600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std= c++11 -O3 $( NVCCFLAGS) $( CPPFLAGS) -Xcompiler " $( CUDA_CXXFLAGS) " -c $< -o $@
e n d e f # NVCC_COMPILE
e l s e
d e f i n e N V C C _ C O M P I L E
$( NVCC) $( NVCCFLAGS) $( CPPFLAGS) -Xcompiler " $( CUDA_CXXFLAGS) " -c $< -o $@
e n d e f # NVCC_COMPILE
e n d i f # JETSON_EOL_MODULE_DETECT
ggml/src/ggml-cuda/%.o : \
ggml/src/ggml-cuda/%.cu \
ggml/include/ggml.h \
ggml/src/ggml-common.h \
ggml/src/ggml-cuda/common.cuh
$( NVCC_COMPILE)
ggml/src/ggml-cuda.o : \
ggml/src/ggml-cuda.cu \
ggml/include/ggml.h \
ggml/include/ggml-backend.h \
ggml/include/ggml-cuda.h \
ggml/src/ggml-backend-impl.h \
ggml/src/ggml-common.h \
$( wildcard ggml/src/ggml-cuda/*.cuh)
$( NVCC_COMPILE)
2024-06-26 21:25:25 +02:00
#src/whisper-mel-cuda.o: src/whisper-mel-cuda.cu src/whisper-mel-cuda.hpp
# $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
2024-06-26 18:34:09 +02:00
e n d i f # GGML_CUDA
i f d e f G G M L _ V U L K A N
MK_CPPFLAGS += -DGGML_USE_VULKAN
MK_LDFLAGS += -lvulkan
OBJ_GGML += ggml/src/ggml-vulkan.o
i f d e f G G M L _ V U L K A N _ C H E C K _ R E S U L T S
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
e n d i f
i f d e f G G M L _ V U L K A N _ D E B U G
MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
e n d i f
i f d e f G G M L _ V U L K A N _ M E M O R Y _ D E B U G
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
e n d i f
i f d e f G G M L _ V U L K A N _ V A L I D A T E
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
e n d i f
i f d e f G G M L _ V U L K A N _ R U N _ T E S T S
MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
e n d i f
ggml/src/ggml-vulkan.o : \
ggml/src/ggml-vulkan.cpp \
ggml/include/ggml-vulkan.h
$( CXX) $( CXXFLAGS) -c $< -o $@
e n d i f # GGML_VULKAN
i f d e f G G M L _ H I P B L A S
ifeq ( $( wildcard /opt/rocm) ,)
ROCM_PATH ?= /usr
AMDGPU_TARGETS ?= $( shell $( shell which amdgpu-arch) )
2023-07-25 18:10:54 +02:00
else
2024-06-26 18:34:09 +02:00
ROCM_PATH ?= /opt/rocm
AMDGPU_TARGETS ?= $( shell $( ROCM_PATH) /llvm/bin/amdgpu-arch)
2023-07-25 18:10:54 +02:00
endif
2024-06-26 18:34:09 +02:00
GGML_CUDA_DMMV_X ?= 32
GGML_CUDA_MMV_Y ?= 1
GGML_CUDA_KQUANTS_ITER ?= 2
MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
i f d e f G G M L _ H I P _ U M A
MK_CPPFLAGS += -DGGML_HIP_UMA
e n d i f # GGML_HIP_UMA
MK_LDFLAGS += -L$( ROCM_PATH) /lib -Wl,-rpath= $( ROCM_PATH) /lib
MK_LDFLAGS += -L$( ROCM_PATH) /lib64 -Wl,-rpath= $( ROCM_PATH) /lib64
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
HIPCC ?= $( CCACHE) $( ROCM_PATH) /bin/hipcc
HIPFLAGS += $( addprefix --offload-arch= ,$( AMDGPU_TARGETS) )
HIPFLAGS += -DGGML_CUDA_DMMV_X= $( GGML_CUDA_DMMV_X)
HIPFLAGS += -DGGML_CUDA_MMV_Y= $( GGML_CUDA_MMV_Y)
HIPFLAGS += -DK_QUANTS_PER_ITERATION= $( GGML_CUDA_KQUANTS_ITER)
i f d e f G G M L _ C U D A _ F O R C E _ D M M V
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
e n d i f # GGML_CUDA_FORCE_DMMV
i f d e f G G M L _ C U D A _ N O _ P E E R _ C O P Y
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
e n d i f # GGML_CUDA_NO_PEER_COPY
OBJ_GGML += ggml/src/ggml-cuda.o
OBJ_GGML += $( patsubst %.cu,%.o,$( wildcard ggml/src/ggml-cuda/*.cu) )
OBJ_GGML += $( OBJ_CUDA_TMPL)
ggml/src/ggml-cuda.o : \
ggml/src/ggml-cuda.cu \
ggml/include/ggml.h \
ggml/include/ggml-backend.h \
ggml/include/ggml-cuda.h \
ggml/src/ggml-backend-impl.h \
ggml/src/ggml-common.h \
$( wildcard ggml/src/ggml-cuda/*.cuh)
2024-03-27 17:55:10 +01:00
$( HIPCC) $( CXXFLAGS) $( HIPFLAGS) -x hip -c -o $@ $<
2023-08-27 19:03:58 +02:00
2024-06-26 18:34:09 +02:00
ggml/src/ggml-cuda/%.o : \
ggml/src/ggml-cuda/%.cu \
ggml/include/ggml.h \
ggml/src/ggml-common.h \
ggml/src/ggml-cuda/common.cuh
2023-08-27 19:03:58 +02:00
$( HIPCC) $( CXXFLAGS) $( HIPFLAGS) -x hip -c -o $@ $<
2024-06-26 18:34:09 +02:00
e n d i f # GGML_HIPBLAS
i f d e f G G M L _ M E T A L
MK_CPPFLAGS += -DGGML_USE_METAL
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
OBJ_GGML += ggml/src/ggml-metal.o
i f d e f G G M L _ M E T A L _ N D E B U G
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
2023-08-27 19:03:58 +02:00
e n d i f
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ M E T A L _ E M B E D _ L I B R A R Y
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
OBJ_GGML += ggml/src/ggml-metal-embed.o
2022-11-23 13:31:05 +01:00
e n d i f
2024-06-26 18:34:09 +02:00
e n d i f # GGML_METAL
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f d e f W H I S P E R _ C O R E M L
MK_CXXFLAGS += -DWHISPER_USE_COREML
LDFLAGS += -framework Foundation -framework CoreML
i f d e f W H I S P E R _ C O R E M L _ A L L O W _ F A L L B A C K
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
e n d i f
2022-10-08 16:35:55 +02:00
e n d i f
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
# ===
i f d e f G G M L _ M E T A L
ggml/src/ggml-metal.o : \
ggml/src/ggml-metal.m \
ggml/include/ggml-metal.h \
ggml/include/ggml.h
$( CC) $( CFLAGS) -c $< -o $@
i f d e f G G M L _ M E T A L _ E M B E D _ L I B R A R Y
ggml/src/ggml-metal-embed.o : \
ggml/src/ggml-metal.metal \
ggml/src/ggml-common.h
@echo "Embedding Metal library"
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
$( eval TEMP_ASSEMBLY = $( shell mktemp) )
@echo ".section __DATA, __ggml_metallib" > $( TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_start" >> $( TEMP_ASSEMBLY)
@echo "_ggml_metallib_start:" >> $( TEMP_ASSEMBLY)
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $( TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_end" >> $( TEMP_ASSEMBLY)
@echo "_ggml_metallib_end:" >> $( TEMP_ASSEMBLY)
@$( AS) $( TEMP_ASSEMBLY) -o $@
@rm -f ${ TEMP_ASSEMBLY }
2022-10-05 19:41:35 +02:00
e n d i f
2024-06-26 18:34:09 +02:00
e n d i f # GGML_METAL
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
i f d e f W H I S P E R _ C O R E M L
src/coreml/whisper-encoder.o : src /coreml /whisper -encoder .mm src /coreml /whisper -encoder .h
$( CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder.mm -o src/coreml/whisper-encoder.o
src/coreml/whisper-encoder-impl.o : src /coreml /whisper -encoder -impl .m src /coreml /whisper -encoder -impl .h
$( CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder-impl.m -o src/coreml/whisper-encoder-impl.o
2023-04-13 17:53:44 +02:00
2024-06-26 18:34:09 +02:00
OBJ_WHISPER += src/coreml/whisper-encoder.o src/coreml/whisper-encoder-impl.o
2022-10-05 20:34:41 +02:00
e n d i f
2023-04-29 09:55:24 +02:00
2024-06-26 18:34:09 +02:00
OBJ_GGML += \
ggml/src/ggml.o \
ggml/src/ggml-alloc.o \
ggml/src/ggml-backend.o \
ggml/src/ggml-quants.o
OBJ_WHISPER += \
src/whisper.o
OBJ_COMMON += \
examples/common.o \
examples/common-ggml.o \
examples/grammar-parser.o
OBJ_SDL += \
examples/common-sdl.o
OBJ_ALL = $( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
LIB_GGML = $( LIB_PRE) ggml$( DSO_EXT)
LIB_GGML_S = $( LIB_PRE) ggml.a
LIB_WHISPER = $( LIB_PRE) whisper$( DSO_EXT)
LIB_WHISPER_S = $( LIB_PRE) whisper.a
LIB_COMMON = $( LIB_PRE) common$( DSO_EXT)
LIB_COMMON_S = $( LIB_PRE) common.a
LIB_COMMON_SDL = $( LIB_PRE) common-sdl$( DSO_EXT)
LIB_COMMON_SDL_S = $( LIB_PRE) common-sdl.a
LIB_ALL = $( LIB_GGML) $( LIB_WHISPER) $( LIB_COMMON) $( LIB_COMMON_SDL)
LIB_ALL_S = $( LIB_GGML_S) $( LIB_WHISPER_S) $( LIB_COMMON_S) $( LIB_COMMON_SDL_S)
GF_CC := $( CC)
i n c l u d e s c r i p t s / g e t - f l a g s . m k
# combine build flags with cmdline overrides
override CPPFLAGS : = $( MK_CPPFLAGS ) $( CPPFLAGS )
override CFLAGS : = $( CPPFLAGS ) $( MK_CFLAGS ) $( GF_CFLAGS ) $( CFLAGS )
BASE_CXXFLAGS := $( MK_CXXFLAGS) $( CXXFLAGS)
override CXXFLAGS : = $( BASE_CXXFLAGS ) $( HOST_CXXFLAGS ) $( GF_CXXFLAGS ) $( CPPFLAGS )
override NVCCFLAGS : = $( MK_NVCCFLAGS ) $( NVCCFLAGS )
override LDFLAGS : = $( MK_LDFLAGS ) $( LDFLAGS )
# identify CUDA host compiler
i f d e f G G M L _ C U D A
GF_CC := $( NVCC) $( NVCCFLAGS) 2>/dev/null .c -Xcompiler
i n c l u d e s c r i p t s / g e t - f l a g s . m k
CUDA_CXXFLAGS := $( BASE_CXXFLAGS) $( GF_CXXFLAGS) -Wno-pedantic
e n d i f
i f d e f W H I S P E R _ C U R L
override CXXFLAGS : = $( CXXFLAGS ) -DWHISPER_USE_CURL
override LDFLAGS : = $( LDFLAGS ) -lcurl
2022-10-05 19:41:35 +02:00
e n d i f
2023-01-02 12:35:26 +01:00
#
# Print build information
#
$(info I whisper.cpp build info : )
2024-06-26 18:34:09 +02:00
$(info I UNAME_S : $( UNAME_S ) )
$(info I UNAME_P : $( UNAME_P ) )
$(info I UNAME_M : $( UNAME_M ) )
$(info I CFLAGS : $( CFLAGS ) )
$(info I CXXFLAGS : $( CXXFLAGS ) )
$(info I NVCCFLAGS : $( NVCCFLAGS ) )
$(info I LDFLAGS : $( LDFLAGS ) )
$(info I CC : $( shell $ ( CC ) --version | head -n 1) )
$(info I CXX : $( shell $ ( CXX ) --version | head -n 1) )
i f d e f G G M L _ C U D A
$(info I NVCC : $( shell $ ( NVCC ) --version | tail -n 1) )
CUDA_VERSION := $( shell $( NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])' )
i f e q ( $( shell awk -v "v =$ ( CUDA_VERSION ) " 'BEGIN { print ( v < 11.7) }') , 1 )
i f n d e f C U D A _ D O C K E R _ A R C H
i f n d e f C U D A _ P O W E R _ A R C H
$(error I ERROR : For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH , e .g . by running "export CUDA_DOCKER_ARCH =compute_XX " on Unix -like systems , where XX is the minimum compute capability that the code needs to run on . A list with compute capabilities can be found here : https ://developer .nvidia .com /cuda -gpus )
e n d i f # CUDA_POWER_ARCH
e n d i f # CUDA_DOCKER_ARCH
e n d i f # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
e n d i f # GGML_CUDA
2023-01-02 12:35:26 +01:00
$( info )
2024-06-26 18:34:09 +02:00
i f d e f D E P R E C A T E _ W A R N I N G
$( info !!! DEPRECATION WARNING !!!)
$( info The following WHISPER_ options are deprecated and will be removed in the future . Use the GGML_ prefix instead )
$( info - WHISPER_CUDA )
$( info - WHISPER_METAL )
$( info - WHISPER_OPENMP )
$( info - WHISPER_RPC )
$( info - WHISPER_SYCL )
$( info - WHISPER_SYCL_F 16)
$( info - WHISPER_OPENBLAS )
$( info - WHISPER_OPENBLAS 64)
$( info - WHISPER_BLIS )
$( info - WHISPER_NO_LLAMAFILE )
$( info - WHISPER_NO_ACCELERATE )
$( info - WHISPER_NO_OPENMP )
$( info - WHISPER_NO_METAL )
2024-03-27 17:55:10 +01:00
$( info )
e n d i f
2022-10-05 19:41:35 +02:00
#
2024-06-26 18:34:09 +02:00
# Build libraries
2022-10-05 19:41:35 +02:00
#
2022-10-02 16:55:45 +02:00
2024-06-26 18:34:09 +02:00
# ggml
2022-09-25 20:23:15 +02:00
2024-06-26 18:34:09 +02:00
ggml/src/ggml.o : \
ggml/src/ggml.c \
ggml/include/ggml.h
2023-09-15 11:18:18 +02:00
$( CC) $( CFLAGS) -c $< -o $@
2024-06-26 18:34:09 +02:00
ggml/src/ggml-alloc.o : \
ggml/src/ggml-alloc.c \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h
2023-11-03 20:35:05 +01:00
$( CC) $( CFLAGS) -c $< -o $@
2024-06-26 18:34:09 +02:00
ggml/src/ggml-backend.o : \
ggml/src/ggml-backend.c \
ggml/include/ggml.h \
ggml/include/ggml-backend.h
2023-11-03 20:35:05 +01:00
$( CC) $( CFLAGS) -c $< -o $@
2024-06-26 18:34:09 +02:00
ggml/src/ggml-quants.o : \
ggml/src/ggml-quants.c \
ggml/include/ggml.h \
ggml/src/ggml-quants.h \
ggml/src/ggml-common.h
$( CC) $( CFLAGS) -c $< -o $@
ggml/src/ggml-blas.o : \
ggml/src/ggml-blas.cpp \
ggml/include/ggml-blas.h
2024-06-16 18:10:20 +02:00
$( CXX) $( CXXFLAGS) -c $< -o $@
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ L L A M A F I L E
ggml/src/sgemm.o : \
ggml/src/sgemm.cpp \
ggml/src/sgemm.h \
ggml/include/ggml.h
$( CXX) $( CXXFLAGS) -c $< -o $@
e n d i f # GGML_LLAMAFILE
2023-09-15 11:18:18 +02:00
2024-06-26 18:34:09 +02:00
i f d e f G G M L _ R P C
ggml/src/ggml-rpc.o : \
ggml/src/ggml-rpc.cpp \
ggml/include/ggml-rpc.h
$( CXX) $( CXXFLAGS) -c $< -o $@
e n d i f # GGML_RPC
$(LIB_GGML) : \
$( OBJ_GGML)
$( CXX) $( CXXFLAGS) -shared -fPIC -o $@ $^ $( LDFLAGS)
$(LIB_GGML_S) : \
$( OBJ_GGML)
ar rcs $( LIB_GGML_S) $^
# whisper
src/whisper.o : \
src/whisper.cpp \
src/whisper-mel.hpp \
include/whisper.h \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h \
ggml/include/ggml-backend.h \
ggml/include/ggml-cuda.h \
ggml/include/ggml-metal.h
2023-04-30 11:14:33 +02:00
$( CXX) $( CXXFLAGS) -c $< -o $@
2022-10-04 19:35:01 +02:00
2024-06-26 18:34:09 +02:00
$(LIB_WHISPER) : \
$( OBJ_WHISPER) \
$( LIB_GGML)
$( CXX) $( CXXFLAGS) -shared -fPIC -o $@ $^ $( LDFLAGS)
2023-04-15 12:21:27 +02:00
2024-06-26 18:34:09 +02:00
$(LIB_WHISPER_S) : \
$( OBJ_WHISPER)
ar rcs $( LIB_WHISPER_S) $^
2023-04-15 12:21:27 +02:00
2024-06-26 18:34:09 +02:00
# common
2023-04-15 12:21:27 +02:00
2024-06-26 18:34:09 +02:00
examples/common.o : \
examples/common.cpp \
examples/common.h
$( CXX) $( CXXFLAGS) -c $< -o $@
examples/common-ggml.o : \
examples/common-ggml.cpp \
examples/common-ggml.h
$( CXX) $( CXXFLAGS) -c $< -o $@
2023-09-15 11:18:18 +02:00
2024-06-26 18:34:09 +02:00
$(LIB_COMMON) : \
$( OBJ_COMMON)
$( CXX) $( CXXFLAGS) -shared -fPIC -o $@ $^ $( LDFLAGS)
2024-02-11 15:41:41 +01:00
2024-06-26 18:34:09 +02:00
$(LIB_COMMON_S) : \
$( OBJ_COMMON)
ar rcs $( LIB_COMMON_S) $^
2024-02-11 15:41:41 +01:00
2024-06-26 18:34:09 +02:00
# common-sdl
2024-02-11 15:41:41 +01:00
2024-06-26 18:34:09 +02:00
CFLAGS_SDL = $( shell sdl2-config --cflags)
LDFLAGS_SDL = $( shell sdl2-config --libs)
examples/common-sdl.o : \
examples/common-sdl.cpp \
examples/common-sdl.h
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $@
2023-09-15 11:18:18 +02:00
2024-06-26 18:34:09 +02:00
$(LIB_COMMON_SDL) : \
$( OBJ_SDL)
$( CXX) $( CXXFLAGS) -shared -fPIC -o $@ $^ $( LDFLAGS) $( LDFLAGS_SDL)
2022-10-10 03:16:42 +02:00
2024-06-26 18:34:09 +02:00
$(LIB_COMMON_SDL_S) : \
$( OBJ_SDL)
ar rcs $( LIB_COMMON_SDL_S) $^
2022-11-13 08:08:33 +01:00
2022-09-25 20:23:15 +02:00
clean :
2024-06-26 18:34:09 +02:00
rm -vrf *.dot $( BUILD_TARGETS) $( TEST_TARGETS)
rm -rvf src/*.o
rm -rvf src/coreml/*.o
rm -rvf tests/*.o
rm -rvf examples/*.o
rm -rvf *.a
rm -rvf *.dll
rm -rvf *.so
rm -rvf *.dot
rm -rvf ggml/*.a
rm -rvf ggml/*.dll
rm -rvf ggml/*.so
rm -vrf ggml/src/*.o
rm -vrf ggml/src/ggml-metal-embed.metal
rm -vrf ggml/src/ggml-cuda/*.o
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
rm -rvf $( BUILD_TARGETS)
rm -rvf $( TEST_TARGETS)
find examples -type f -name "*.o" -delete
2022-09-25 20:23:15 +02:00
2022-10-05 19:41:35 +02:00
#
# Examples
#
2024-06-26 18:34:09 +02:00
# $< is the first prerequisite, i.e. the source file.
# Explicitly compile this to an object file so that it can be cached with ccache.
# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
GET_OBJ_FILE = $( patsubst %.c,%.o,$( patsubst %.cpp,%.o,$( patsubst %.cu,%.o,$( 1) ) ) )
main : examples /main /main .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON)
$( CXX) $( CXXFLAGS) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS)
@echo
@echo '==== Run ./llama-cli -h for help. ===='
@echo
bench : examples /bench /bench .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON)
$( CXX) $( CXXFLAGS) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS)
quantize : examples /quantize /quantize .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON)
$( CXX) $( CXXFLAGS) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS)
server : examples /server /server .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON)
$( CXX) $( CXXFLAGS) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LWINSOCK2)
command : examples /command /command .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LDFLAGS_SDL)
stream : examples /stream /stream .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LDFLAGS_SDL)
lsp : examples /lsp /lsp .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LDFLAGS_SDL)
talk : examples /talk /talk .cpp examples /talk /gpt -2.cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LDFLAGS_SDL)
talk-llama : examples /talk -llama /talk -llama .cpp examples /talk -llama /llama .cpp examples /talk -llama /unicode .cpp examples /talk -llama /unicode -data .cpp \
$( OBJ_GGML) $( OBJ_WHISPER) $( OBJ_COMMON) $( OBJ_SDL)
$( CXX) $( CXXFLAGS) $( CFLAGS_SDL) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS) $( LDFLAGS_SDL)
2022-10-05 19:41:35 +02:00
2024-06-26 18:34:09 +02:00
#
# Tests
#
2022-11-25 18:06:56 +01:00
2024-06-26 18:34:09 +02:00
tests : $( TEST_TARGETS )
examples : vim plugin and LSP server (#1144)
* Initial proof of concept Vim plugin
At present, this is likely only slightly better than feature parity with
the existing whisper.nvim
Known issues:
Trailing whitespace
Up to an existing length(5 seconds) of speech may be processed when
listening is enabled
CPU cycles are spent processing speech even when not listening.
Fixing these issues is likely dependent upon future efforts to create a
dedicated library instead of wrapping examples/stream
* Support $WHISPER_CPP_HOME environment variable
A minor misunderstanding of the whisper.nvim implementation resulted in
a plugin that was functional, but not a drop in replacement as it should
be now.
* Initial progress on LSP implementation
Libcall is nonviable because the library is immediately freed after a
call is made. Further investigation has shown Language Server Protocol as
a promising alternative that both simplifies the required logic on the
vimscript side and increases the ease with which plugins for other
editors could be made in the future. This is a very large undertaking
and my progress has slowed substantially.
Work is far from being in a usable state, but I wish to keep track of
major refactors for organizational purposes.
* Rewrite audio windowing of guided transcription
One of the defining goals of this venture is allowing consecutive
commands to be rattled off without the existing deadzones of the current
implementation.
* Add unguided_transcription. Cleanup.
The unguided transcription implantation heavily borrows from existing
example implementations and the guided_transcription logic.
A high level pass was done to check that method arguments are accurate
to what inputs are actually required.
A first attempt at cancellation support was added for record keeping,
but will be deleted in a future commit.
* Fix compilation.
Resolves a large number of compilation errors.
No testing has been done yet for execution errors.
Update Makefile and .gitignore
* Functional unguided_transcription
* Functional guided_transcription
Fix commandset_list being passed by value
Properly register the first token of a multitoken command
* Minor changes before time fix
I've apparently made an awfully major mistake in thinking that unix time
was in milliseconds and will be changing all timekeeping code to use
standardized methods.
In preparation for this is a number of minor bugfixes.
Output is manually flushed.
An echo method has been added.
registerCommandset now wraps the returned index
* Swap timekeeping to use std::chrono
* Add work in progress lsp backed whisper.vim plugin
Current progress blockers are
Adding modality awareness to the command processing
(specifically, motion prompting)
Improving the VAD to be a little more responsive
(testing start of activity)
* Reworked vim plugin command loop
* Fix change inside
Multiple bug fixes that, crucially, bring the plugin to the point where a
demonstration video is possible
Add better echo messaging so whisper_log isn't required
Add loading complete message as indicator when listening has started
Insert/append are actually included in command sets
Some more heavy handed corrections to prevent a double exit when leaving
insert mode
As a somewhat hacky fix, the very first space is removed when inserting.
This cleans up most use cases, but leaves me unsatisfied with the few
cases it would be desired.
* Forcibly set commandset_index to 0 after subinsert
Also remove unnecessary ! to use builtin vim command
* Fix upper
A minor scope mistake was causing upper'd inputs to be eaten.
This was fixed and echoing was slightly improved for clarity.
* Fix formatting
Corrects indentation to 4 spaces as project standard
Slightly better error support for malformed json input
* Remove obsolete vim plugin
* Add json.hpp library
The same library that is used for the llama.cpp server
* Minor cleanups
add lsp to the make clean directive.
remove a redundant params definition.
reorder whisper.vim logging for subtranscriptions
Corrections to unlets (variables of argument scope appear immutable)
* Fix indentation. Fallback for subTranscription
Indentation has been changed to 4 spaces.
Unit testing has been set up, I'm opting not to include it in the
repository for now.
It however, has revealed a bug in the state logic where a
subtranscription can be initiated without having a saved command
When this occurs, append is added as a fallback
* Move audio polling logic to a subfunction
While work on the improved vad will continue, It's grown to be a little
out of scope. Instead, a future commit will perform multiple detection
passes at substretches of audio when a backlog of audio exists.
To facilitate this, and prevent code duplication, the vad code has been
moved into a subfunction shared by both the unguided and guided
transcription functions.
* Test for voice over subchunks if backlog > 1s
As the existing VAD implementation only checks for a falling edge at the
end of an audio chunk. It fails to detect voice in cases where the
recorded voice is only at the beginning of the audio.
To ameliorate this, when the timestamp would cause analysis of audio
over a second in length, it is split into 1 second length subchunks
which are individually tested.
Results are promising, but there seems to be a remaining bug with
unguided transcription likely related to saving context
* Limit the maximum length of audio input.
This existing VAD implementation only detects falling edges, which
means any gap in the users speaking is processed for transcription.
This simply establishes a constant maximum length depending on the type
of transcription. Uguided gets a generous 10 seconds and guided, 2.
While quick testing showed that commands are generally around a half a
second to a second, limiting commands to an even second resulted in
extreme degradation of quality. (Seemingly always the same output for a
given commandset)
* Unguided timestamp tracking, cleanup
Unguided transcriptions where not setup to allow for passing of
timestamp data forward, but have been corrected.
No_context is now always set to false. While conceptually desirable for
the quality of guided transcription, It was seemingly responsible for
prior command inputs ghosting in unguided transcription.
Save and Run are now tracked by command number instead of command text.
While command_text was provided for convenience, I wish to keep command
index authoritative. This gives greater consistency and potentially
allows for end users to rename or even translate the spoken versions of
these commands
* By default, maintain mode.
Previously, mode was reset to 0 unless otherwise set.
In addition to causing some edge cases, this was didn't mesh well with
the existing approach to visual mode.
With this change, initial tests indicate visual mode is functional.
* Add undo breaks before subtranscriptions
Subtranscriptions use undo as a hack to allow for partial responses to
be displayed. However, scripts don't cause an undo break mid execution
unless specifically instructed to. This meant that multiple
unguided transcriptions from a single session would cause a latter to
undo a former.
This is now fixed and undo should be reasonably usable as a command.
* Append instead of insert for new undo sequence
When entering and leavening insert mode with `i`, the cursor shifts one
column to the left. This is remedied by using append instead of insert
for setting these breaks in the undo sequence
`-` was also added to the pronunciation dictionary to be pronounced as
minus as it was causing a particularly high failure rate.
* Move undo sequence breaks to command execution
Previously, undo sequence breaks were triggered when there was a command
that caused a move to insert mode. This caused commands that changed
state (like delete or paste) to be bundled together with into the last
command that caused text to be entered.
* Fix repeat. Add space, carrot, dollar commands
Repeat (.) wasn't being tracked properly just like undo and is being
manually tracked now.
While efforts have been made to properly handle spaces, it was
particularly finicky to add a single space when one is needed. A
special 'space' command has been added to insert a single space and move
the cursor after it.
Carrot and Dollar commands have been added for start of line and end of
line respectively. These are both simple to implement, and just a
matter of defining a pronunciation.
* Return error on duplicate in commandset
Not every command in the commandset tokenizes to a single token.
Because of this, it's possible for that two commands could resolve to
the same single token after subsequent tokens are discarded.
This commit adds a simple check for duplicates when a commandset is
registered and returns an error if so.
Additional code will be required later on the vim side to actually
process this error.
* Add support for user-defined commands
This adds a user definable dictionary from spoken keys to strings or
funcrefs. All keys are added to the commandlist and when spoken, trigger
the corresponding function.
Like "save" and "run", these user commands are only available when the
command buffer is empty.
* Add readme, update cmake
* Add area commandset. Refactor spoken_dict
Area commands (inside word, around sentence...) have been given a
commandset as considered earlier.
Verbose definitions for spoken_dict entries now use dicts instead of
lists. This shortens the definition for most keys that require it and
scales better with the addition of further commandsets
* Add mark, jump. Fix change under visual.
Mark (m) and jump (') have been added.
When a visual selection was executed upon a command that initiated a
subtranscription (change) the area of the visual selection is not
properly tracked which causes the attempt to stream in partial response
to fail. This is solved by disabling partial transcriptions from being
streamed when a subtranscription is started while in visual mode.
* Accommodate ignorecase. Fix change.
From testing on older different versions of vim, the test for
distinguishing an 'R' replace all from an 'r' replace could fail if
ignorecase was set. The comparison has been changed to explicitly
require case matching
Change detection has been moved to the execution section as it was missing the
change+motion case.
* Support registers. Fix README typo
There's no logic to prevent doubled register entry, but the functional
result is equivalent to if the same key order was typed into vim.
A minor typo in the readme. I've mismemorized the mnemonic for 't' as 'to'
instead of till., but 'to' can't be used as it's a homophone with '2'.
While there was no mistake in the actual logic, it was misleading to use
'to' in the readme.
2023-08-27 20:35:06 +02:00
2024-06-26 18:34:09 +02:00
tests/test-c.o : tests /test -c .c include /whisper .h
$( CC) $( CFLAGS) -c $( filter-out %.h,$^) -o $@
2022-12-09 19:38:10 +01:00
2024-06-26 18:34:09 +02:00
tests/test-backend-ops : tests /test -backend -ops .cpp \
$( OBJ_GGML)
$( CXX) $( CXXFLAGS) -c $< -o $( call GET_OBJ_FILE, $<)
$( CXX) $( CXXFLAGS) $( filter-out %.h $<,$^) $( call GET_OBJ_FILE, $<) -o $@ $( LDFLAGS)
2022-10-05 19:41:35 +02:00
#
# Audio samples
#
2022-09-26 08:36:51 +02:00
# download a few audio samples into folder "./samples":
2022-09-25 20:23:15 +02:00
.PHONY : samples
samples :
@echo "Downloading samples..."
2022-09-25 21:35:26 +02:00
@mkdir -p samples
2022-09-25 20:23:15 +02:00
@wget --quiet --show-progress -O samples/gb0.ogg https://upload.wikimedia.org/wikipedia/commons/2/22/George_W._Bush%27s_weekly_radio_address_%28November_1%2C_2008%29.oga
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
2022-09-30 18:33:09 +02:00
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
2023-07-04 08:45:00 +02:00
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
2023-07-25 18:00:45 +02:00
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
2022-09-25 20:23:15 +02:00
@echo "Converting to 16-bit WAV ..."
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
2023-07-04 08:45:00 +02:00
@rm samples/*.ogg
2022-09-30 18:33:09 +02:00
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
@rm samples/mm1.wav
2023-07-04 08:45:00 +02:00
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
@rm samples/a13.mp3
2023-07-25 18:00:45 +02:00
@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
@rm samples/diffusion2023-07-03.flac
2022-09-25 20:23:15 +02:00
2022-10-05 19:41:35 +02:00
#
# Models
#
2022-09-25 20:23:15 +02:00
2022-09-26 08:36:51 +02:00
# if not already downloaded, the following targets download the specified model and
# runs it on all samples in the folder "./samples":
2022-09-25 20:23:15 +02:00
2022-09-26 08:36:51 +02:00
.PHONY : tiny .en
2022-09-28 19:46:05 +02:00
.PHONY : tiny
2022-09-26 08:36:51 +02:00
.PHONY : base .en
2022-09-28 19:46:05 +02:00
.PHONY : base
2022-09-25 20:23:15 +02:00
.PHONY : small .en
2022-09-28 19:46:05 +02:00
.PHONY : small
.PHONY : medium .en
.PHONY : medium
2022-12-06 17:48:57 +01:00
.PHONY : large -v 1
2023-11-07 14:30:18 +01:00
.PHONY : large -v 2
2023-11-15 18:42:25 +01:00
.PHONY : large -v 3
2022-09-25 20:23:15 +02:00
2023-11-15 18:42:25 +01:00
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 : main
2022-10-25 18:13:08 +02:00
bash ./models/download-ggml-model.sh $@
2022-09-25 21:35:26 +02:00
@echo ""
2022-09-25 20:23:15 +02:00
@echo "==============================================="
2022-09-26 08:36:51 +02:00
@echo " Running $@ on all samples in ./samples ... "
2022-09-25 20:23:15 +02:00
@echo "==============================================="
@echo ""
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
2022-11-25 04:24:08 +01:00
echo " [+] Running $@ on $$ f ... (run 'ffplay $$ f' to listen) " ; \
2022-09-25 20:23:15 +02:00
echo "----------------------------------------------" ; \
echo "" ; \
2022-09-26 08:36:51 +02:00
./main -m models/ggml-$@ .bin -f $$ f ; \
2022-09-25 20:23:15 +02:00
echo "" ; \
done