whisper : wip sched (not working yet)

whisper : migrating to ggml-backend (wip)
ios : add support for Swift Package Manager (#1370 )
2025-08-09 18:05:52 +02:00 · 2023-11-09 19:07:54 +02:00 · 2023-11-09 15:43:26 +02:00 · 2023-11-07 23:53:31 +02:00 · 2023-11-07 16:15:48 +02:00 · 2023-11-07 16:08:46 +02:00
41 changed files with 790 additions and 710 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,6 +18,11 @@ build-no-accel/
 build-sanitize-addr/
 build-sanitize-thread/

+# SPM
+.build/
+.swiftpm
+*.metallib
+
 /main
 /stream
 /command
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)

-project(whisper.cpp VERSION 1.4.2)
+project(whisper.cpp VERSION 1.4.3)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/3
+++ b/3
@ -417,9 +417,10 @@ samples:
 .PHONY: medium.en
 .PHONY: medium
 .PHONY: large-v1
+.PHONY: large-v2
 .PHONY: large

-tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
+tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
 	bash ./models/download-ggml-model.sh $@
 	@echo ""
 	@echo "==============================================="
--- a/Package.swift
+++ b/Package.swift
@ -0,0 +1,77 @@
+// swift-tools-version:5.5
+
+import PackageDescription
+
+#if arch(arm) || arch(arm64)
+let platforms: [SupportedPlatform]? = [
+    .macOS(.v12),
+    .iOS(.v14),
+    .watchOS(.v4),
+    .tvOS(.v14)
+]
+let exclude: [String] = []
+let resources: [Resource] = [
+    .process("ggml-metal.metal")
+]
+let additionalSources: [String] = ["ggml-metal.m"]
+let additionalSettings: [CSetting] = [
+    .unsafeFlags(["-fno-objc-arc"]),
+    .define("GGML_USE_METAL")
+]
+#else
+let platforms: [SupportedPlatform]? = nil
+let exclude: [String] = ["ggml-metal.metal"]
+let resources: [Resource] = []
+let additionalSources: [String] = []
+let additionalSettings: [CSetting] = []
+#endif
+
+let package = Package(
+    name: "whisper",
+    platforms: platforms,
+    products: [
+        .library(name: "whisper", targets: ["whisper"]),
+    ],
+    targets: [
+        .target(
+            name: "whisper",
+            path: ".",
+            exclude: exclude + [
+               "bindings",
+               "cmake",
+               "coreml",
+               "examples",
+               "extra",
+               "models",
+               "samples",
+               "tests",
+               "CMakeLists.txt",
+               "ggml-cuda.cu",
+               "ggml-cuda.h",
+               "Makefile"
+            ],
+            sources: [
+                "ggml.c",
+                "whisper.cpp",
+                "ggml-alloc.c",
+                "ggml-backend.c",
+                "ggml-quants.c"
+            ] + additionalSources,
+            resources: resources,
+            publicHeadersPath: "spm-headers",
+            cSettings: [
+                .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
+                .define("GGML_USE_ACCELERATE")
+                // NOTE: NEW_LAPACK will required iOS version 16.4+
+                // We should consider add this in the future when we drop support for iOS 14
+                // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
+                // .define("ACCELERATE_NEW_LAPACK"),
+                // .define("ACCELERATE_LAPACK_ILP64")
+            ] + additionalSettings,
+            linkerSettings: [
+                .linkedFramework("Accelerate")
+            ]
+        )
+    ],
+    cxxLanguageStandard: .cxx11
+)
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

@ -234,6 +234,7 @@ make small
 make medium.en
 make medium
 make large-v1
+make large-v2
 make large
 ```

@ -245,7 +246,7 @@ make large
 | base   | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
 | small  | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
 | medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
-| large  | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
+| large  | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |

 ## Quantization

--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -24,7 +24,7 @@ const (

 var (
 	// The models which will be downloaded, if no model is specified as an argument
-	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
+	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
 )

 var (
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -83,7 +83,6 @@ const (
 	SampleRate = C.WHISPER_SAMPLE_RATE                 // Expected sample rate, samples per second
 	SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
 	NumFFT     = C.WHISPER_N_FFT
-	NumMEL     = C.WHISPER_N_MEL
 	HopLength  = C.WHISPER_HOP_LENGTH
 	ChunkSize  = C.WHISPER_CHUNK_SIZE
 )
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.4.2",
+  "version": "1.4.3",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/examples/bench.wasm/emscripten.cpp
+++ b/examples/bench.wasm/emscripten.cpp
@ -23,7 +23,9 @@ void bench_main(size_t index) {

    fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);

-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+    const int n_mels = whisper_model_n_mels(ctx);
+
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
        fprintf(stderr, "error: failed to set mel: %d\n", ret);
        return;
    }
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -73,7 +73,9 @@ int whisper_bench_full(const whisper_params & params) {
        return 2;
    }

-    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+    const int n_mels = whisper_model_n_mels(ctx);
+
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
        fprintf(stderr, "error: failed to set mel: %d\n", ret);
        return 3;
    }
--- a/examples/livestream.sh
+++ b/examples/livestream.sh
@ -48,7 +48,7 @@ if [ -n "$3" ]; then
 fi

 # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )

 # list available models
 function list_models {
--- a/examples/twitch.sh
+++ b/examples/twitch.sh
@ -21,7 +21,7 @@ help()
    echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
    echo "options:"
    echo "-s       Step in seconds (default is $step)."
-    echo "-m       Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large' (default is '$model')."
+    echo "-m       Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
    echo "-t       Number of threads to use."
    echo "-h       Print this help page."
    echo
--- a/examples/whisper.android/app/build.gradle
+++ b/examples/whisper.android/app/build.gradle
@ -18,9 +18,7 @@ android {
        vectorDrawables {
            useSupportLibrary true
        }
-        ndk {
-            abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
-        }
+
    }

    buildTypes {
@ -43,20 +41,10 @@ android {
    composeOptions {
        kotlinCompilerExtensionVersion '1.5.0'
    }
-    ndkVersion "25.2.9519653"
-    externalNativeBuild {
-        cmake {
-            path = file("src/main/jni/whisper/CMakeLists.txt")
-        }
-    }
-    packagingOptions {
-        resources {
-            excludes += '/META-INF/{AL2.0,LGPL2.1}'
-        }
-    }
 }

 dependencies {
+    implementation project(':lib')
    implementation 'androidx.activity:activity-compose:1.7.2'
    implementation 'androidx.compose.material:material-icons-core:1.5.0'
    implementation 'androidx.compose.material3:material3:1.1.1'
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
@ -15,7 +15,7 @@ import androidx.lifecycle.viewmodel.initializer
 import androidx.lifecycle.viewmodel.viewModelFactory
 import com.whispercppdemo.media.decodeWaveFile
 import com.whispercppdemo.recorder.Recorder
-import com.whispercppdemo.whisper.WhisperContext
+import com.whispercpp.whisper.WhisperContext
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
@ -35,7 +35,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
    private val modelsPath = File(application.filesDir, "models")
    private val samplesPath = File(application.filesDir, "samples")
    private var recorder: Recorder = Recorder()
-    private var whisperContext: WhisperContext? = null
+    private var whisperContext: com.whispercpp.whisper.WhisperContext? = null
    private var mediaPlayer: MediaPlayer? = null
    private var recordedFile: File? = null

@ -47,7 +47,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
    }

    private suspend fun printSystemInfo() {
-        printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
+        printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo()))
    }

    private suspend fun loadData() {
@ -78,7 +78,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
        printMessage("Loading model...\n")
        val models = application.assets.list("models/")
        if (models != null) {
-            whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
+            whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
            printMessage("Loaded model ${models[0]}.\n")
        }

--- a/examples/whisper.android/lib/.gitignore
+++ b/examples/whisper.android/lib/.gitignore
@ -0,0 +1 @@
+/build
--- a/examples/whisper.android/lib/build.gradle
+++ b/examples/whisper.android/lib/build.gradle
@ -0,0 +1,51 @@
+plugins {
+    id 'com.android.library'
+    id 'org.jetbrains.kotlin.android'
+}
+
+android {
+    namespace 'com.whispercpp'
+    compileSdk 34
+
+    defaultConfig {
+        minSdk 26
+        targetSdk 34
+        versionCode 1
+        versionName "1.0"
+
+        ndk {
+            abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
+        }
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+    kotlinOptions {
+        jvmTarget = '1.8'
+    }
+
+    ndkVersion "25.2.9519653"
+    externalNativeBuild {
+        cmake {
+            path = file("src/main/jni/whisper/CMakeLists.txt")
+        }
+    }
+    packagingOptions {
+        resources {
+            excludes += '/META-INF/{AL2.0,LGPL2.1}'
+        }
+    }
+}
+
+dependencies {
+    implementation 'androidx.core:core-ktx:1.9.0'
+    implementation 'androidx.appcompat:appcompat:1.6.1'
+    implementation 'com.google.android.material:material:1.8.0'
+}
--- a/examples/whisper.android/lib/src/main/AndroidManifest.xml
+++ b/examples/whisper.android/lib/src/main/AndroidManifest.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android">
+
+</manifest>
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
@ -1,4 +1,4 @@
-package com.whispercppdemo.whisper
+package com.whispercpp.whisper

 import android.content.res.AssetManager
 import android.os.Build
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/WhisperCpuConfig.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/WhisperCpuConfig.kt
@ -1,4 +1,4 @@
-package com.whispercppdemo.whisper
+package com.whispercpp.whisper

 import android.util.Log
 import java.io.BufferedReader
--- a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
--- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c
+++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c
@ -131,7 +131,7 @@ static struct whisper_context *whisper_init_from_asset(
 }

 JNIEXPORT jlong JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
        JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
    UNUSED(thiz);
    struct whisper_context *context = NULL;
@ -142,7 +142,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
 }

 JNIEXPORT jlong JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
        JNIEnv *env, jobject thiz, jstring model_path_str) {
    UNUSED(thiz);
    struct whisper_context *context = NULL;
@ -153,7 +153,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
 }

 JNIEXPORT void JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
        JNIEnv *env, jobject thiz, jlong context_ptr) {
    UNUSED(env);
    UNUSED(thiz);
@ -162,7 +162,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
 }

 JNIEXPORT void JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
        JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
    UNUSED(thiz);
    struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -194,7 +194,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
 }

 JNIEXPORT jint JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
        JNIEnv *env, jobject thiz, jlong context_ptr) {
    UNUSED(env);
    UNUSED(thiz);
@ -203,7 +203,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
 }

 JNIEXPORT jstring JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
        JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
    UNUSED(thiz);
    struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -213,7 +213,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
 }

 JNIEXPORT jstring JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
        JNIEnv *env, jobject thiz
 ) {
    UNUSED(thiz);
@ -223,7 +223,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
 }

 JNIEXPORT jstring JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
                                                                      jint n_threads) {
    UNUSED(thiz);
    const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
@ -231,7 +231,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *en
 }

 JNIEXPORT jstring JNICALL
-Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
                                                                          jint n_threads) {
    UNUSED(thiz);
    const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
--- a/examples/whisper.android/settings.gradle
+++ b/examples/whisper.android/settings.gradle
@ -14,3 +14,4 @@ dependencyResolutionManagement {
 }
 rootProject.name = "WhisperCppDemo"
 include ':app'
+include ':lib'
--- a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
+++ b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
@ -1,4 +1,5 @@
 import Foundation
+import whisper

 enum WhisperError: Error {
    case couldNotInitializeContext
--- a/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
+++ b/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
@ -1,4 +0,0 @@
-//
-//  Use this file to import your target's public headers that you would like to expose to Swift.
-//
-#import "whisper.h"
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
@ -15,16 +15,9 @@
 		0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
 		0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
 		0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
-		0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
-		0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
-		0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
 		0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
 		0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
-		18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
-		18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
-		18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
-		7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
-		7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */; };
+		E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
 /* End PBXBuildFile section */

 /* Begin PBXFileReference section */
@ -38,25 +31,9 @@
 		0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
 		0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
-		0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
-		0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
-		0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whisper.cpp; sourceTree = "<group>"; };
-		0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whisper.h; sourceTree = "<group>"; };
-		0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ggml.c; sourceTree = "<group>"; };
-		0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
 		0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
 		0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
-		18ABE14C2AF555FA0044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-backend.c"; sourceTree = "<group>"; };
-		18ABE14D2AF555FA0044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend.h"; sourceTree = "<group>"; };
-		18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend-impl.h"; sourceTree = "<group>"; };
-		18ABE14F2AF555FA0044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-quants.h"; sourceTree = "<group>"; };
-		18ABE1502AF555FA0044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-impl.h"; sourceTree = "<group>"; };
-		18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
-		18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
-		18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
-		7FCB081E2ACFA04400AF3530 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-metal.h"; sourceTree = "<group>"; };
-		7FCB08252ACFA3A400AF3530 /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "ggml-metal.m"; sourceTree = "<group>"; };
-		7FCB08272ACFA48500AF3530 /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = "ggml-metal.metal"; sourceTree = "<group>"; };
+		E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
 /* End PBXFileReference section */

 /* Begin PBXFrameworksBuildPhase section */
@ -64,6 +41,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -99,11 +77,12 @@
 		0AAC5D8E29539CCF003032C3 = {
 			isa = PBXGroup;
 			children = (
+				E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
 				0A8E48FF2954B3F100704C1B /* README.md */,
-				0AAC5DC529539E89003032C3 /* whisper.cpp */,
 				0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
 				0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
 				0AAC5D9829539CCF003032C3 /* Products */,
+				E3F92DC32AFA8E3800A6A9D4 /* Frameworks */,
 			);
 			sourceTree = "<group>";
 		};
@ -128,42 +107,9 @@
 			path = whisper.swiftui.demo;
 			sourceTree = "<group>";
 		};
-		0AAC5DA129539CD0003032C3 /* Preview Content */ = {
-			isa = PBXGroup;
-			children = (
-				0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
-			);
-			name = "Preview Content";
-			path = "../Preview Content";
-			sourceTree = "<group>";
-		};
-		0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
-			isa = PBXGroup;
-			children = (
-				7FCB08272ACFA48500AF3530 /* ggml-metal.metal */,
-				7FCB081E2ACFA04400AF3530 /* ggml-metal.h */,
-				7FCB08252ACFA3A400AF3530 /* ggml-metal.m */,
-				18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
-				18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
-				18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
-				18ABE1502AF555FA0044A204 /* ggml-impl.h */,
-				18ABE1512AF555FA0044A204 /* ggml-quants.c */,
-				18ABE14F2AF555FA0044A204 /* ggml-quants.h */,
-				18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
-				18AED4802AB21F2B009D854F /* ggml-alloc.h */,
-				0AAC5DC929539EB0003032C3 /* ggml.c */,
-				0AAC5DCA29539EB0003032C3 /* ggml.h */,
-				0AAC5DC729539EB0003032C3 /* whisper.cpp */,
-				0AAC5DC829539EB0003032C3 /* whisper.h */,
-			);
-			name = whisper.cpp;
-			path = ../..;
-			sourceTree = "<group>";
-		};
 		0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
 			isa = PBXGroup;
 			children = (
-				0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
 				0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
 			);
 			path = whisper.cpp.swift;
@ -182,11 +128,17 @@
 			children = (
 				0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
 				0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
-				0AAC5DA129539CD0003032C3 /* Preview Content */,
 			);
 			path = "Supporting files";
 			sourceTree = "<group>";
 		};
+		E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */

 /* Begin PBXNativeTarget section */
@ -203,6 +155,9 @@
 			dependencies = (
 			);
 			name = whisper.swiftui;
+			packageProductDependencies = (
+				E3F92DC42AFA8E3800A6A9D4 /* whisper */,
+			);
 			productName = WhisperCppDemo;
 			productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
 			productType = "com.apple.product-type.application";
@ -247,7 +202,6 @@
 			buildActionMask = 2147483647;
 			files = (
 				0AA751482953AC2E001EE061 /* samples in Resources */,
-				0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
 				0A8E49002954B3F100704C1B /* README.md in Resources */,
 				0AA751492953AC2E001EE061 /* models in Resources */,
 				0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
@ -263,17 +217,10 @@
 			files = (
 				0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
 				0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
-				0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
-				18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
 				0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
-				7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */,
 				0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
 				0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
-				0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
 				0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
-				7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */,
-				18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
-				18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -401,7 +348,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
-				DEVELOPMENT_TEAM = P8JZH34X63;
+				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
@ -425,7 +372,6 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SWIFT_EMIT_LOC_STRINGS = YES;
-				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
@ -442,7 +388,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
-				DEVELOPMENT_TEAM = P8JZH34X63;
+				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
@ -471,7 +417,6 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SWIFT_EMIT_LOC_STRINGS = YES;
-				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@ -499,6 +444,13 @@
 			defaultConfigurationName = Release;
 		};
 /* End XCConfigurationList section */
+
+/* Begin XCSwiftPackageProductDependency section */
+		E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
+			isa = XCSwiftPackageProductDependency;
+			productName = whisper;
+		};
+/* End XCSwiftPackageProductDependency section */
 	};
 	rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
 }
--- a/extra/convert-all.sh
+++ b/extra/convert-all.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )

 for model in "${models[@]}"; do
    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
--- a/ggml.c
+++ b/ggml.c
@ -143,12 +143,6 @@ void ggml_print_backtrace(void) {
 }
 #endif

-#undef MIN
-#undef MAX
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
 /*#define GGML_PERF*/
 #define GGML_DEBUG 0
 #define GGML_GELU_FP16
@ -277,6 +271,12 @@ inline static void * ggml_aligned_malloc(size_t size) {
 // floating point type used to accumulate sums
 typedef double ggml_float;

+#undef MIN
+#undef MAX
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
 //
 // global data
 //
--- a/models/README.md
+++ b/models/README.md
@ -50,7 +50,8 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
 | medium    | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
 | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
 | large-v1  | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
-| large     | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
+| large-v2  | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
+| large     | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |

 ## Model files for testing purposes

--- a/models/convert-h5-to-coreml.py
+++ b/models/convert-h5-to-coreml.py
@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
 # Ported from models/convert-whisper-to-coreml.py
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
+    parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
    parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
    parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
    parser.add_argument("--quantize",     type=bool, help="quantize weights to F16", default=False)
    parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
    args = parser.parse_args()

-    if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
+    if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
        raise ValueError("Invalid model name")

    pt_target_path = f"models/hf-{args.model_name}.pt"
--- a/models/convert-pt-to-ggml.py
+++ b/models/convert-pt-to-ggml.py
@ -228,7 +228,7 @@ with np.load(dir_whisper / "whisper" / "assets" / "mel_filters.npz") as f:
 # for backwards compatibility, also check for older hf_transformers format tokenizer files
 # old format: dir_whisper/whisper/assets/[multilingual/gpt2]/vocab.json
 # new format: dir_whisper/whisper/assets/[multilingual/gpt2].tiktoken
-multilingual = hparams["n_vocab"] == 51865
+multilingual = hparams["n_vocab"] >= 51865
 tokenizer = dir_whisper / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
 tokenizer_type = "tiktoken"
 if not tokenizer.is_file():
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@ -194,7 +194,7 @@ class TextDecoderANE(TextDecoder):
        x = x.permute(0,2,3,1).squeeze(0)

        # ANE can only load tensors with dim size of at most 16,384 - whisper uses 51,864 (en) or 51,865 (multi-lang) tokens so we need to compute in chunks
-        if self.token_embedding.weight.shape[0] == 51865:
+        if self.token_embedding.weight.shape[0] >= 51865:
            # split in 11 chunks - 4715 each
            splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//11, dim=0)
            logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1)
@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
+    parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
    parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
    parser.add_argument("--quantize",     type=bool, help="quantize weights to F16", default=False)
    parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
    args = parser.parse_args()

-    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
+    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
        raise ValueError("Invalid model name")

    whisper = load_model(args.model).cpu()
--- a/models/convert-whisper-to-openvino.py
+++ b/models/convert-whisper-to-openvino.py
@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
+    parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
    args = parser.parse_args()

-    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
+    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
        raise ValueError("Invalid model name")

    whisper = load_model(args.model).cpu()
--- a/models/download-coreml-model.sh
+++ b/models/download-coreml-model.sh
@ -19,7 +19,7 @@ function get_script_path() {
 models_path="$(get_script_path)"

 # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )

 # list available models
 function list_models {
--- a/models/download-ggml-model.cmd
+++ b/models/download-ggml-model.cmd
@ -8,7 +8,7 @@ popd
 set argc=0
 for %%x in (%*) do set /A argc+=1

-set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
+set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large

 if %argc% neq 1 (
  echo.
@ -57,8 +57,8 @@ goto :eof
 :list_models
  echo.
  echo Available models:
-  (for %%a in (%models%) do ( 
-    echo %%a 
+  (for %%a in (%models%) do (
+    echo %%a
  ))
  echo.
  exit /b
--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@ -41,6 +41,7 @@ models=(
    "medium-q5_0"
    "medium.en-q5_0"
    "large-v1"
+    "large-v2"
    "large"
    "large-q5_0"
 )
--- a/spm-headers/ggml.h
+++ b/spm-headers/ggml.h
@ -0,0 +1 @@
+../ggml.h
--- a/spm-headers/whisper.h
+++ b/spm-headers/whisper.h
@ -0,0 +1 @@
+../whisper.h
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@ -19,7 +19,7 @@
 cd `dirname $0`

 # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )

 # list available models
 function list_models {
--- a/whisper.cpp
+++ b/whisper.cpp
--- a/whisper.h
+++ b/whisper.h
@ -1,6 +1,8 @@
 #ifndef WHISPER_H
 #define WHISPER_H

+#include "ggml.h"
+
 #include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
@ -29,7 +31,6 @@

 #define WHISPER_SAMPLE_RATE 16000
 #define WHISPER_N_FFT       400
-#define WHISPER_N_MEL       80
 #define WHISPER_HOP_LENGTH  160
 #define WHISPER_CHUNK_SIZE  30

@ -111,15 +112,15 @@ extern "C" {
    // Various functions for loading a ggml whisper model.
    // Allocate (almost) all memory needed for the model.
    // Return NULL on failure
-    WHISPER_API struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_file_with_params  (const char * path_model,              struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size,    struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_with_params            (struct whisper_model_loader * loader, struct whisper_context_params params);

    // These are the same as the above, but the internal state of the context is not allocated automatically
    // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
-    WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state  (const char * path_model,              struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size,    struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_with_params_no_state            (struct whisper_model_loader * loader, struct whisper_context_params params);

    WHISPER_DEPRECATED(
        WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
@ -571,8 +572,7 @@ extern "C" {

    // Control logging output; default behavior is to print to stderr

-    typedef void (*whisper_log_callback)(const char * line);
-    WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
+    WHISPER_API void whisper_log_set(ggml_log_callback log_callback, void * user_data);

 #ifdef __cplusplus
 }
Author	SHA1	Message	Date
Georgi Gerganov	bf4110dbcf	whisper : wip sched (not working yet)	2023-11-09 19:07:54 +02:00
Georgi Gerganov	005b8ccbf0	whisper : migrating to ggml-backend (wip)	2023-11-09 15:43:26 +02:00
Sindre Sorhus	d03c60dd7f	ios : add support for Swift Package Manager (#1370 ) * Add support for Swift * Make it build in Xcode * Use the SPM package in the SwiftUI example app	2023-11-07 23:53:31 +02:00
Georgi Gerganov	6a5d195109	release : v1.4.3	2023-11-07 16:15:48 +02:00
Georgi Gerganov	0cbef75422	ggml : fix MIN / MAX macro re-definition	2023-11-07 16:08:46 +02:00
Georgi Gerganov	2cdfc4e025	whisper : add support for large v3 (#1444 ) * whisper : add support for large v3 * bench : fix build + fix go bindings * bench : fix n_mels * models : update readme	2023-11-07 15:30:18 +02:00
Tobrun	973111088b	android : decouple example into a library and app module (#1445 )	2023-11-07 14:27:33 +02:00
Ben Nortier	11b503055e	whisper : reset ctx->t_start_us when calling whisper_reset_timings() (#1434 ) Co-authored-by: Ben Nortier <ben@bjnortier.com>	2023-11-07 11:04:32 +02:00