build : add WHISPER_COREML_ALLOW_FALLBACK to make / CMake (#812)

2025-08-24 23:55:27 +02:00 · 2023-04-29 10:55:24 +03:00
parent 94a7cd2a07
commit 3efb81dec6
3 changed files with 93 additions and 76 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,7 @@ if (APPLE)
    option(WHISPER_NO_FMA                "whisper: disable FMA" OFF)

    option(WHISPER_COREML                "whisper: enable Core ML framework" OFF)
+    option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
 else()
    option(WHISPER_SUPPORT_OPENBLAS      "whisper: support for OpenBLAS" OFF)
 endif()
@@ -119,6 +120,10 @@ if (APPLE)
        else()
            message(WARNING "CoreML framework not found")
        endif()
+
+        if (WHISPER_COREML_ALLOW_FALLBACK)
+            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
+        endif()
    endif()
 endif()

--- a/13
+++ b/13
@@ -123,6 +123,7 @@ endif
 ifeq ($(UNAME_M),amd64)
 	CFLAGS += -mavx -mavx2 -mfma -mf16c
 endif
+
 ifneq ($(filter ppc64%,$(UNAME_M)),)
 	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
 	ifneq (,$(findstring POWER9,$(POWER9_M)))
@@ -133,6 +134,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
 		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
 	endif
 endif
+
 ifndef WHISPER_NO_ACCELERATE
 	# Mac M1 - include Accelerate framework
 	ifeq ($(UNAME_S),Darwin)
@@ -140,26 +142,36 @@ ifndef WHISPER_NO_ACCELERATE
 		LDFLAGS += -framework Accelerate
 	endif
 endif
+
 ifdef WHISPER_COREML
 	CXXFLAGS += -DWHISPER_USE_COREML
 	LDFLAGS  += -framework Foundation -framework CoreML
+
+ifdef WHISPER_COREML_ALLOW_FALLBACK
+	CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
 endif
+endif
+
 ifdef WHISPER_OPENBLAS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
 	LDFLAGS += -lopenblas
 endif
+
 ifdef WHISPER_GPROF
 	CFLAGS   += -pg
 	CXXFLAGS += -pg
 endif
+
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 	CFLAGS += -mcpu=native
 	CXXFLAGS += -mcpu=native
 endif
+
 ifneq ($(filter armv6%,$(UNAME_M)),)
 	# 32-bit Raspberry Pi 1, 2, 3
 	CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
 endif
+
 ifneq ($(filter armv7%,$(UNAME_M)),)
 	# 32-bit ARM, for example on Armbian or possibly raspbian
 	CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
@@ -167,6 +179,7 @@ ifneq ($(filter armv7%,$(UNAME_M)),)
 	# 64-bit ARM, use these (TODO: auto-detect 64-bit)
 	# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 endif
+
 ifneq ($(filter armv8%,$(UNAME_M)),)
 	# Raspberry Pi 4
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -1393,8 +1393,7 @@ static bool whisper_encode_internal(
    const bool use_coreml = wstate.ctx_coreml != nullptr;
 #endif

-    if (!use_coreml)
-    {
+    if (!use_coreml) {
        // convolution + gelu
        {
            wstate.use_buf(ctx0, 1);
@@ -1504,7 +1503,7 @@ static bool whisper_encode_internal(

                wstate.use_buf(ctx0, 0);

-    #ifdef WHISPER_USE_FLASH_ATTN
+#ifdef WHISPER_USE_FLASH_ATTN
                struct ggml_tensor * Q =
                    ggml_permute(ctx0,
                            ggml_cpy(ctx0,
@@ -1529,7 +1528,7 @@ static bool whisper_encode_internal(
                            ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));

                struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
-    #else
+#else
                struct ggml_tensor * Q =
                    ggml_permute(ctx0,
                            ggml_cpy(ctx0,
@@ -1575,7 +1574,7 @@ static bool whisper_encode_internal(
                            );

                struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
-    #endif
+#endif
                struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);

                wstate.use_buf(ctx0, 1);
@@ -1625,13 +1624,13 @@ static bool whisper_encode_internal(
                            ggml_repeat(ctx0, layer.mlp_ln_b, cur));
                }

-    #ifdef WHISPER_USE_FLASH_FF
+#ifdef WHISPER_USE_FLASH_FF
                wstate.use_buf(ctx0, 0);

                cur = ggml_flash_ff(ctx0,
                        ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
                        layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
-    #else
+#else
                wstate.use_buf(ctx0, 0);

                // fully connected
@@ -1662,7 +1661,7 @@ static bool whisper_encode_internal(
                cur = ggml_add(ctx0,
                        ggml_repeat(ctx0, layer.mlp_1_b, cur),
                        cur);
-    #endif
+#endif
            }

            wstate.use_buf(ctx0, 3);