models : update readme

bench : fix n_mels
bench : fix build + fix go bindings
2025-07-04 00:11:12 +02:00 · 2023-11-07 13:53:01 +02:00 · 2023-11-07 13:45:56 +02:00 · 2023-11-07 13:20:02 +02:00 · 2023-11-07 11:58:39 +02:00
40 changed files with 1719 additions and 1127 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,7 +8,6 @@
 .DS_Store
 build/
 build-coreml/
 build-em/
 build-debug/
 build-release/
@ -19,11 +18,6 @@ build-no-accel/
 build-sanitize-addr/
 build-sanitize-thread/
 # SPM
 .build/
 .swiftpm
 *.metallib
 /main
 /stream
 /command
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)
-project(whisper.cpp VERSION 1.4.3)
+project(whisper.cpp VERSION 1.4.2)
 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/42
+++ b/42
@ -307,7 +307,7 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
 ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
 	$(CC)  $(CFLAGS)   -c $< -o $@
-WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o
+WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o
 whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
@ -331,11 +331,11 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
 WHISPER_OBJ += ggml-metal.o
 endif
-libwhisper.a: $(WHISPER_OBJ)
+libwhisper.a: ggml.o $(WHISPER_OBJ)
-	$(AR) rcs libwhisper.a $(WHISPER_OBJ)
+	$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
-libwhisper.so: $(WHISPER_OBJ)
+libwhisper.so: ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
 clean:
 	rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
@ -349,30 +349,30 @@ CC_SDL=`sdl2-config --cflags --libs`
 SRC_COMMON     = examples/common.cpp examples/common-ggml.cpp
 SRC_COMMON_SDL = examples/common-sdl.cpp
-main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
 	./main -h
-bench: examples/bench/bench.cpp $(WHISPER_OBJ)
+bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp $(WHISPER_OBJ) -o bench $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
-quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
+quantize: examples/quantize/quantize.cpp ggml.o $(WHISPER_OBJ) $(SRC_COMMON)
-	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o quantize $(LDFLAGS)
-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
-lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
+lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
-talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
+talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
 #
 # Audio samples
--- a/Package.swift
+++ b/Package.swift
@ -1,77 +0,0 @@
 // swift-tools-version:5.5
 import PackageDescription
 #if arch(arm) || arch(arm64)
 let platforms: [SupportedPlatform]? = [
    .macOS(.v12),
    .iOS(.v14),
    .watchOS(.v4),
    .tvOS(.v14)
 ]
 let exclude: [String] = []
 let resources: [Resource] = [
    .process("ggml-metal.metal")
 ]
 let additionalSources: [String] = ["ggml-metal.m"]
 let additionalSettings: [CSetting] = [
    .unsafeFlags(["-fno-objc-arc"]),
    .define("GGML_USE_METAL")
 ]
 #else
 let platforms: [SupportedPlatform]? = nil
 let exclude: [String] = ["ggml-metal.metal"]
 let resources: [Resource] = []
 let additionalSources: [String] = []
 let additionalSettings: [CSetting] = []
 #endif
 let package = Package(
    name: "whisper",
    platforms: platforms,
    products: [
        .library(name: "whisper", targets: ["whisper"]),
    ],
    targets: [
        .target(
            name: "whisper",
            path: ".",
            exclude: exclude + [
               "bindings",
               "cmake",
               "coreml",
               "examples",
               "extra",
               "models",
               "samples",
               "tests",
               "CMakeLists.txt",
               "ggml-cuda.cu",
               "ggml-cuda.h",
               "Makefile"
            ],
            sources: [
                "ggml.c",
                "whisper.cpp",
                "ggml-alloc.c",
                "ggml-backend.c",
                "ggml-quants.c"
            ] + additionalSources,
            resources: resources,
            publicHeadersPath: "spm-headers",
            cSettings: [
                .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
                .define("GGML_USE_ACCELERATE")
                // NOTE: NEW_LAPACK will required iOS version 16.4+
                // We should consider add this in the future when we drop support for iOS 14
                // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
                // .define("ACCELERATE_NEW_LAPACK"),
                // .define("ACCELERATE_LAPACK_ILP64")
            ] + additionalSettings,
            linkerSettings: [
                .linkedFramework("Accelerate")
            ]
        )
    ],
    cxxLanguageStandard: .cxx11
 )
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
-Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.4.3",
+  "version": "1.4.2",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/coreml/whisper-encoder-impl.h
+++ b/coreml/whisper-encoder-impl.h
@ -123,7 +123,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
 /**
    Make a prediction using the convenience interface
-    @param logmel_data as 1 × n_mel × 3000 3-dimensional array of floats:
+    @param logmel_data as 1 × 80 × 3000 3-dimensional array of floats:
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the prediction as whisper_encoder_implOutput
 */
--- a/coreml/whisper-encoder.h
+++ b/coreml/whisper-encoder.h
@ -3,8 +3,6 @@
 // Code is derived from the work of Github user @wangchou
 // ref: https://github.com/wangchou/callCoreMLFromCpp
 #include <stdint.h>
 #if __cplusplus
 extern "C" {
 #endif
@ -16,8 +14,6 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx);
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                             int64_t   n_ctx,
                             int64_t   n_mel,
                               float * mel,
                               float * out);
--- a/coreml/whisper-encoder.mm
+++ b/coreml/whisper-encoder.mm
@ -48,15 +48,13 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx) {
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                             int64_t   n_ctx,
                             int64_t   n_mel,
                               float * mel,
                               float * out) {
    MLMultiArray * inMultiArray = [
        [MLMultiArray alloc] initWithDataPointer: mel
-                                           shape: @[@1, @(n_mel), @(n_ctx)]
+                                           shape: @[@1, @80, @3000]
                                        dataType: MLMultiArrayDataTypeFloat32
-                                         strides: @[@(n_ctx*n_mel), @(n_ctx), @1]
+                                         strides: @[@(240000), @(3000), @1]
                                     deallocator: nil
                                           error: nil
    ];
--- a/examples/common.h
+++ b/examples/common.h
@ -181,7 +181,7 @@ private:
    // It is assumed that PCM data is normalized to a range from -1 to 1
    bool write_audio(const float * data, size_t length) {
        for (size_t i = 0; i < length; ++i) {
-            const int16_t intSample = data[i] * 32767;
+            const auto intSample = static_cast<const int16_t>(data[i] * 32767);
            file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
            dataSize += sizeof(int16_t);
        }
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@ -248,7 +248,7 @@ int main(int argc, char ** argv) {
        return 1;
    }
-    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
+    if (whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params);
        exit(0);
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@ -121,13 +121,13 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
            return false;
        }
-        char word[129];
+        std::string word;
        for (int i = 0; i < n_vocab; i++) {
            uint32_t len;
            fin.read((char *) &len, sizeof(len));
-            word[len] = '\0';
+
-            fin.read((char *) word, len);
+            word.resize(len);
            fin.read((char *) word.data(), len);
            vocab.token_to_id[word] = i;
            vocab.id_to_token[i] = word;
--- a/examples/whisper.android/app/build.gradle
+++ b/examples/whisper.android/app/build.gradle
@ -18,7 +18,9 @@ android {
        vectorDrawables {
            useSupportLibrary true
        }
-
+        ndk {
            abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
        }
    }
    buildTypes {
@ -41,10 +43,20 @@ android {
    composeOptions {
        kotlinCompilerExtensionVersion '1.5.0'
    }
    ndkVersion "25.2.9519653"
    externalNativeBuild {
        cmake {
            path = file("src/main/jni/whisper/CMakeLists.txt")
        }
    }
    packagingOptions {
        resources {
            excludes += '/META-INF/{AL2.0,LGPL2.1}'
        }
    }
 }
 dependencies {
    implementation project(':lib')
    implementation 'androidx.activity:activity-compose:1.7.2'
    implementation 'androidx.compose.material:material-icons-core:1.5.0'
    implementation 'androidx.compose.material3:material3:1.1.1'
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
@ -15,7 +15,7 @@ import androidx.lifecycle.viewmodel.initializer
 import androidx.lifecycle.viewmodel.viewModelFactory
 import com.whispercppdemo.media.decodeWaveFile
 import com.whispercppdemo.recorder.Recorder
-import com.whispercpp.whisper.WhisperContext
+import com.whispercppdemo.whisper.WhisperContext
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
@ -35,7 +35,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
    private val modelsPath = File(application.filesDir, "models")
    private val samplesPath = File(application.filesDir, "samples")
    private var recorder: Recorder = Recorder()
-    private var whisperContext: com.whispercpp.whisper.WhisperContext? = null
+    private var whisperContext: WhisperContext? = null
    private var mediaPlayer: MediaPlayer? = null
    private var recordedFile: File? = null
@ -47,7 +47,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
    }
    private suspend fun printSystemInfo() {
-        printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo()))
+        printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
    }
    private suspend fun loadData() {
@ -78,7 +78,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
        printMessage("Loading model...\n")
        val models = application.assets.list("models/")
        if (models != null) {
-            whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
+            whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
            printMessage("Loaded model ${models[0]}.\n")
        }
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
@ -1,4 +1,4 @@
-package com.whispercpp.whisper
+package com.whispercppdemo.whisper
 import android.content.res.AssetManager
 import android.os.Build
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/WhisperCpuConfig.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/WhisperCpuConfig.kt
@ -1,4 +1,4 @@
-package com.whispercpp.whisper
+package com.whispercppdemo.whisper
 import android.util.Log
 import java.io.BufferedReader
--- a/examples/whisper.android/app/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android/app/src/main/jni/whisper/CMakeLists.txt
--- a/examples/whisper.android/app/src/main/jni/whisper/jni.c
+++ b/examples/whisper.android/app/src/main/jni/whisper/jni.c
@ -131,7 +131,7 @@ static struct whisper_context *whisper_init_from_asset(
 }
 JNIEXPORT jlong JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
        JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
    UNUSED(thiz);
    struct whisper_context *context = NULL;
@ -142,7 +142,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
 }
 JNIEXPORT jlong JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
        JNIEnv *env, jobject thiz, jstring model_path_str) {
    UNUSED(thiz);
    struct whisper_context *context = NULL;
@ -153,7 +153,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
 }
 JNIEXPORT void JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
        JNIEnv *env, jobject thiz, jlong context_ptr) {
    UNUSED(env);
    UNUSED(thiz);
@ -162,7 +162,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
 }
 JNIEXPORT void JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
        JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
    UNUSED(thiz);
    struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -194,7 +194,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
 }
 JNIEXPORT jint JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
        JNIEnv *env, jobject thiz, jlong context_ptr) {
    UNUSED(env);
    UNUSED(thiz);
@ -203,7 +203,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
 }
 JNIEXPORT jstring JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
        JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
    UNUSED(thiz);
    struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -213,7 +213,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
 }
 JNIEXPORT jstring JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
        JNIEnv *env, jobject thiz
 ) {
    UNUSED(thiz);
@ -223,7 +223,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
 }
 JNIEXPORT jstring JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
                                                                      jint n_threads) {
    UNUSED(thiz);
    const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
@ -231,7 +231,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, j
 }
 JNIEXPORT jstring JNICALL
-Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
                                                                          jint n_threads) {
    UNUSED(thiz);
    const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
--- a/examples/whisper.android/lib/.gitignore
+++ b/examples/whisper.android/lib/.gitignore
@ -1 +0,0 @@
 /build
--- a/examples/whisper.android/lib/build.gradle
+++ b/examples/whisper.android/lib/build.gradle
@ -1,51 +0,0 @@
 plugins {
    id 'com.android.library'
    id 'org.jetbrains.kotlin.android'
 }
 android {
    namespace 'com.whispercpp'
    compileSdk 34
    defaultConfig {
        minSdk 26
        targetSdk 34
        versionCode 1
        versionName "1.0"
        ndk {
            abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
        }
    }
    buildTypes {
        release {
            minifyEnabled false
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
    ndkVersion "25.2.9519653"
    externalNativeBuild {
        cmake {
            path = file("src/main/jni/whisper/CMakeLists.txt")
        }
    }
    packagingOptions {
        resources {
            excludes += '/META-INF/{AL2.0,LGPL2.1}'
        }
    }
 }
 dependencies {
    implementation 'androidx.core:core-ktx:1.9.0'
    implementation 'androidx.appcompat:appcompat:1.6.1'
    implementation 'com.google.android.material:material:1.8.0'
 }
--- a/examples/whisper.android/lib/src/main/AndroidManifest.xml
+++ b/examples/whisper.android/lib/src/main/AndroidManifest.xml
@ -1,4 +0,0 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android">
 </manifest>
--- a/examples/whisper.android/settings.gradle
+++ b/examples/whisper.android/settings.gradle
@ -14,4 +14,3 @@ dependencyResolutionManagement {
 }
 rootProject.name = "WhisperCppDemo"
 include ':app'
 include ':lib'
--- a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
+++ b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
@ -1,5 +1,4 @@
 import Foundation
 import whisper
 enum WhisperError: Error {
    case couldNotInitializeContext
--- a/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
+++ b/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
@ -0,0 +1,4 @@
 //
 //  Use this file to import your target's public headers that you would like to expose to Swift.
 //
 #import "whisper.h"
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
@ -15,9 +15,16 @@
 		0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
 		0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
 		0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
 		0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
 		0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
 		0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
 		0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
 		0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
-		E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
+		18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
 		18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
 		18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
 		7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
 		7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */; };
 /* End PBXBuildFile section */
 /* Begin PBXFileReference section */
@ -31,9 +38,25 @@
 		0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
 		0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
 		0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
 		0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
 		0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whisper.cpp; sourceTree = "<group>"; };
 		0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whisper.h; sourceTree = "<group>"; };
 		0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ggml.c; sourceTree = "<group>"; };
 		0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
 		0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
 		0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
-		E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
+		18ABE14C2AF555FA0044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-backend.c"; sourceTree = "<group>"; };
 		18ABE14D2AF555FA0044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend.h"; sourceTree = "<group>"; };
 		18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend-impl.h"; sourceTree = "<group>"; };
 		18ABE14F2AF555FA0044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-quants.h"; sourceTree = "<group>"; };
 		18ABE1502AF555FA0044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-impl.h"; sourceTree = "<group>"; };
 		18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
 		18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
 		18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
 		7FCB081E2ACFA04400AF3530 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-metal.h"; sourceTree = "<group>"; };
 		7FCB08252ACFA3A400AF3530 /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "ggml-metal.m"; sourceTree = "<group>"; };
 		7FCB08272ACFA48500AF3530 /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = "ggml-metal.metal"; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 /* Begin PBXFrameworksBuildPhase section */
@ -41,7 +64,6 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
 				E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -77,12 +99,11 @@
 		0AAC5D8E29539CCF003032C3 = {
 			isa = PBXGroup;
 			children = (
 				E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
 				0A8E48FF2954B3F100704C1B /* README.md */,
 				0AAC5DC529539E89003032C3 /* whisper.cpp */,
 				0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
 				0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
 				0AAC5D9829539CCF003032C3 /* Products */,
 				E3F92DC32AFA8E3800A6A9D4 /* Frameworks */,
 			);
 			sourceTree = "<group>";
 		};
@ -107,9 +128,42 @@
 			path = whisper.swiftui.demo;
 			sourceTree = "<group>";
 		};
 		0AAC5DA129539CD0003032C3 /* Preview Content */ = {
 			isa = PBXGroup;
 			children = (
 				0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
 			);
 			name = "Preview Content";
 			path = "../Preview Content";
 			sourceTree = "<group>";
 		};
 		0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
 			isa = PBXGroup;
 			children = (
 				7FCB08272ACFA48500AF3530 /* ggml-metal.metal */,
 				7FCB081E2ACFA04400AF3530 /* ggml-metal.h */,
 				7FCB08252ACFA3A400AF3530 /* ggml-metal.m */,
 				18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
 				18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
 				18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
 				18ABE1502AF555FA0044A204 /* ggml-impl.h */,
 				18ABE1512AF555FA0044A204 /* ggml-quants.c */,
 				18ABE14F2AF555FA0044A204 /* ggml-quants.h */,
 				18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
 				18AED4802AB21F2B009D854F /* ggml-alloc.h */,
 				0AAC5DC929539EB0003032C3 /* ggml.c */,
 				0AAC5DCA29539EB0003032C3 /* ggml.h */,
 				0AAC5DC729539EB0003032C3 /* whisper.cpp */,
 				0AAC5DC829539EB0003032C3 /* whisper.h */,
 			);
 			name = whisper.cpp;
 			path = ../..;
 			sourceTree = "<group>";
 		};
 		0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
 			isa = PBXGroup;
 			children = (
 				0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
 				0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
 			);
 			path = whisper.cpp.swift;
@ -128,17 +182,11 @@
 			children = (
 				0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
 				0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
 				0AAC5DA129539CD0003032C3 /* Preview Content */,
 			);
 			path = "Supporting files";
 			sourceTree = "<group>";
 		};
 		E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
 			);
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
 /* End PBXGroup section */
 /* Begin PBXNativeTarget section */
@ -155,9 +203,6 @@
 			dependencies = (
 			);
 			name = whisper.swiftui;
 			packageProductDependencies = (
 				E3F92DC42AFA8E3800A6A9D4 /* whisper */,
 			);
 			productName = WhisperCppDemo;
 			productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
 			productType = "com.apple.product-type.application";
@ -202,6 +247,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				0AA751482953AC2E001EE061 /* samples in Resources */,
 				0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
 				0A8E49002954B3F100704C1B /* README.md in Resources */,
 				0AA751492953AC2E001EE061 /* models in Resources */,
 				0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
@ -217,10 +263,17 @@
 			files = (
 				0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
 				0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
 				0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
 				18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
 				0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
 				7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */,
 				0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
 				0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
 				0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
 				0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
 				7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */,
 				18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
 				18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -348,7 +401,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
-				DEVELOPMENT_TEAM = "";
+				DEVELOPMENT_TEAM = P8JZH34X63;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
@ -372,6 +425,7 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
@ -388,7 +442,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
-				DEVELOPMENT_TEAM = "";
+				DEVELOPMENT_TEAM = P8JZH34X63;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
@ -417,6 +471,7 @@
 				SDKROOT = auto;
 				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@ -444,13 +499,6 @@
 			defaultConfigurationName = Release;
 		};
 /* End XCConfigurationList section */
 /* Begin XCSwiftPackageProductDependency section */
 		E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
 			isa = XCSwiftPackageProductDependency;
 			productName = whisper;
 		};
 /* End XCSwiftPackageProductDependency section */
 	};
 	rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
 }
--- a/extra/bench-all.sh
+++ b/extra/bench-all.sh
@ -18,11 +18,11 @@ else
 fi
 models=(                                               \
-      "tiny"   "tiny-q4_0"   "tiny-q4_1"   "tiny-q5_0"   "tiny-q5_1"   "tiny-q8_0" \
+      "tiny"   "tiny-q5_0"   "tiny-q5_1"   "tiny-q8_0" \
-      "base"   "base-q4_0"   "base-q4_1"   "base-q5_0"   "base-q5_1"   "base-q8_0" \
+      "base"   "base-q5_0"   "base-q5_1"   "base-q8_0" \
-     "small"  "small-q4_0"  "small-q4_1"  "small-q5_0"  "small-q5_1"  "small-q8_0" \
+     "small"  "small-q5_0"  "small-q5_1"  "small-q8_0" \
-    "medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
+    "medium" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
-     "large"  "large-q4_0"  "large-q4_1"  "large-q5_0"  "large-q5_1"  "large-q8_0" \
+     "large"  "large-q5_0"  "large-q5_1"  "large-q8_0" \
 )
 if [ "$encoder_only" -eq 0 ]; then
@ -83,10 +83,6 @@ for model in "${models[@]}"; do
        config="$config COREML"
    fi
    if [[ $system_info == *"CUDA = 1"* ]]; then
        config="$config CUDA"
    fi
    if [[ $system_info == *"METAL = 1"* ]]; then
        config="$config METAL"
    fi
--- a/extra/quantize-all.sh
+++ b/extra/quantize-all.sh
@ -15,13 +15,33 @@ declare -a filedex
 cd `dirname $0`
 cd ../
-for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
+# Let's loop across all the objects in the 'models' dir:
-    m="models/$i"
+for i in ./models/*; do
-    if [ -f "$m" ]; then
+    # Check to see if it's a file or directory
-        if [ "${m##*.}" == "bin" ]; then
+    if [ -d "$i" ]; then
-            ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
+        # It's a directory! We should make sure it's not empty first:
-            ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
+        if [ "$(ls -A $i)" ]; then
-            filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
+            # Passed! Let's go searching for bin files (shouldn't need to go more than a layer deep here)
            for f in "$i"/*.bin; do
                # [Neuron Activation]
                newfile=`echo "${f##*/}" | cut -d _ -f 1`;
                if [ "$newfile" != "q5" ]; then
                    ./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" ${qtype1};
                    ./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" ${qtype0};
                    filedex+=( "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" )
                fi
            done
        fi
    else
        # It's a file! Let's make sure it's the right type:
        if [ "${i##*.}" == "bin" ]; then
            # And we probably want to skip the testing files
            if [ "${i:9:8}" != "for-test" ]; then
                # [Neuron Activation]
                ./quantize "${i}" "${i:-4}-${qtype1}.bin" ${qtype1};
                ./quantize "${i}" "${i:-4}-${qtype0}.bin" ${qtype0};
                filedex+=( "${i:-4}-${qtype1}.bin" "${i:-4}-${qtype0}.bin" )
            fi
        fi
    fi
 done
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -4476,13 +4476,6 @@ static __device__ void cpy_1_f32_f16(const char * cxi, char * cdsti) {
    *dsti = __float2half(*xi);
 }
 static __device__ void cpy_1_f16_f16(const char * cxi, char * cdsti) {
    const half * xi = (const half *) cxi;
    half * dsti = (half *) cdsti;
    *dsti = *xi;
 }
 template <cpy_kernel_t cpy_1>
 static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne,
                                   const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
@ -4736,25 +4729,6 @@ static __global__ void clamp_f32(const float * x, float * dst, const float min,
    dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
 }
 static  __global__ void im2col_f32_f16(
        const float * x, half * dst,
        int ofs0, int ofs1, int IW, int IH, int CHW,
        int s0, int s1, int p0, int p1, int d0, int d1) {
    const int iiw = blockIdx.z * s0 + threadIdx.z * d0 - p0;
 	const int iih = blockIdx.y * s1 + threadIdx.y * d1 - p1;
    const int offset_dst =
        (threadIdx.x * gridDim.y * gridDim.z + blockIdx.y * gridDim.z + blockIdx.z) * CHW +
        (blockIdx.x * (blockDim.y * blockDim.z) + threadIdx.y * blockDim.z + threadIdx.z);
    if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
        dst[offset_dst] = __float2half(0.0f);
    } else {
        const int offset_src =  threadIdx.x * ofs0 + blockIdx.x * ofs1;
        dst[offset_dst] = __float2half(x[offset_src + iih * IW + iiw]);
    }
 }
 template<int qk, int qr, dequantize_kernel_t dq>
 static void get_rows_cuda(const void * x, const int32_t * y, float * dst, const int nrows, const int ncols, cudaStream_t stream) {
    const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
@ -5644,16 +5618,6 @@ static void ggml_cpy_f32_f16_cuda(
        (cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
 }
 static void ggml_cpy_f16_f16_cuda(
    const char * cx, char * cdst, const int ne,
    const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
    const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, cudaStream_t stream) {
    const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
    cpy_f32_f16<cpy_1_f16_f16><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
        (cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
 }
 static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
    const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
    scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
@ -5737,15 +5701,6 @@ static void soft_max_f32_cuda(const float * x, float * dst, const int ncols_x, c
    soft_max_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols_x);
 }
 static void im2col_f32_f16_cuda(const float * x, half * dst,
    int OH, int IW, int IH, int OW, int IC,
    int KH, int KW, int N,  int ofs0, int ofs1,
    int s0, int s1, int p0, int p1, int d0, int d1, cudaStream_t stream) {
    dim3 block_nums(IC, OH, OW);
    dim3 block_dims(N,  KH, KW);
    im2col_f32_f16<<<block_nums, block_dims, 0, stream>>>(x, dst, ofs0, ofs1, IW, IH, (IC * KH * KW), s0, s1, p0, p1, d0, d1);
 }
 // buffer pool for cuda
 #define MAX_CUDA_BUFFERS 256
@ -6528,7 +6483,7 @@ inline void ggml_cuda_op_mul_mat_cublas(
            src1_as_f16 = (half *) ggml_cuda_pool_malloc_async(ne * sizeof(half), &src1_as, id, stream);
            to_fp16_cuda(src1_ddf_i, src1_as_f16, ne, stream);
        }
-        const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddf_i : src1_as_f16;
+        const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddq_i : src1_as_f16;
        size_t dst_f16_as = 0;
        half * dst_f16 = (half *) ggml_cuda_pool_malloc_async(row_diff*src1_ncols * sizeof(half), &dst_f16_as, id, stream);
@ -6704,45 +6659,6 @@ inline void ggml_cuda_op_alibi(
    (void) src1_dd;
 }
 inline void ggml_cuda_op_im2col(
    const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
    const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
    GGML_ASSERT(src0->type == GGML_TYPE_F16);
    GGML_ASSERT(src1->type == GGML_TYPE_F32);
    GGML_ASSERT( dst->type == GGML_TYPE_F16);
    const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
    const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
    const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
    const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
    const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
    const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
    const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
    const int64_t N  = src1->ne[is_2D ? 3 : 2];
    const int64_t IC = src1->ne[is_2D ? 2 : 1];
    const int64_t IH = is_2D ? src1->ne[1] : 1;
    const int64_t IW =         src1->ne[0];
    const int64_t KH = is_2D ? src0->ne[1] : 1;
    const int64_t KW =         src0->ne[0];
    const int64_t OH = is_2D ? dst->ne[2] : 1;
    const int64_t OW =         dst->ne[1];
    const size_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
    const size_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
    im2col_f32_f16_cuda(src1_dd, (half*) dst_dd,
        OH, IW, IH, OW, IC, KH, KW, N,
        ofs0, ofs1, s0, s1, p0, p1, d0, d1, main_stream);
    (void) src0;
    (void) src0_dd;
 }
 inline void ggml_cuda_op_diag_mask_inf(
    const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
    const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
@ -7633,9 +7549,6 @@ static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, gg
    } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
        ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
                              ne10, ne11, nb10, nb11, nb12, main_stream);
    } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
        ggml_cpy_f16_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
                              ne10, ne11, nb10, nb11, nb12, main_stream);
    } else {
        fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
                ggml_type_name(src0->type), ggml_type_name(src1->type));
@ -7667,10 +7580,6 @@ static void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1,
    ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi);
 }
 void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
    ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_im2col);
 }
 static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
    (void) src0;
    (void) src1;
@ -8034,9 +7943,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
        case GGML_OP_ALIBI:
            func = ggml_cuda_alibi;
            break;
        case GGML_OP_IM2COL:
            func = ggml_cuda_im2col;
            break;
        default:
            return false;
    }
--- a/ggml-metal.h
+++ b/ggml-metal.h
@ -26,7 +26,7 @@
 #include <stdbool.h>
 // max memory buffers that can be mapped to the device
-#define GGML_METAL_MAX_BUFFERS 64
+#define GGML_METAL_MAX_BUFFERS 16
 #define GGML_METAL_MAX_COMMAND_BUFFERS 32
 struct ggml_tensor;
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -86,7 +86,6 @@ struct ggml_metal_context {
    GGML_METAL_DECL_KERNEL(rms_norm);
    GGML_METAL_DECL_KERNEL(norm);
    GGML_METAL_DECL_KERNEL(mul_mv_f32_f32);
    GGML_METAL_DECL_KERNEL(mul_mv_f16_f16);
    GGML_METAL_DECL_KERNEL(mul_mv_f16_f32);
    GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_1row);
    GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_l4);
@ -115,7 +114,6 @@ struct ggml_metal_context {
    GGML_METAL_DECL_KERNEL(rope_f32);
    GGML_METAL_DECL_KERNEL(rope_f16);
    GGML_METAL_DECL_KERNEL(alibi_f32);
    GGML_METAL_DECL_KERNEL(im2col_f16);
    GGML_METAL_DECL_KERNEL(cpy_f32_f16);
    GGML_METAL_DECL_KERNEL(cpy_f32_f32);
    GGML_METAL_DECL_KERNEL(cpy_f16_f16);
@ -289,7 +287,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
        GGML_METAL_ADD_KERNEL(rms_norm);
        GGML_METAL_ADD_KERNEL(norm);
        GGML_METAL_ADD_KERNEL(mul_mv_f32_f32);
        GGML_METAL_ADD_KERNEL(mul_mv_f16_f16);
        GGML_METAL_ADD_KERNEL(mul_mv_f16_f32);
        GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_1row);
        GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_l4);
@ -320,7 +317,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
        GGML_METAL_ADD_KERNEL(rope_f32);
        GGML_METAL_ADD_KERNEL(rope_f16);
        GGML_METAL_ADD_KERNEL(alibi_f32);
        GGML_METAL_ADD_KERNEL(im2col_f16);
        GGML_METAL_ADD_KERNEL(cpy_f32_f16);
        GGML_METAL_ADD_KERNEL(cpy_f32_f32);
        GGML_METAL_ADD_KERNEL(cpy_f16_f16);
@ -390,7 +386,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
    GGML_METAL_DEL_KERNEL(rms_norm);
    GGML_METAL_DEL_KERNEL(norm);
    GGML_METAL_DEL_KERNEL(mul_mv_f32_f32);
    GGML_METAL_DEL_KERNEL(mul_mv_f16_f16);
    GGML_METAL_DEL_KERNEL(mul_mv_f16_f32);
    GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_1row);
    GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_l4);
@ -421,7 +416,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
    GGML_METAL_DEL_KERNEL(rope_f32);
    GGML_METAL_DEL_KERNEL(rope_f16);
    GGML_METAL_DEL_KERNEL(alibi_f32);
    GGML_METAL_DEL_KERNEL(im2col_f16);
    GGML_METAL_DEL_KERNEL(cpy_f32_f16);
    GGML_METAL_DEL_KERNEL(cpy_f32_f32);
    GGML_METAL_DEL_KERNEL(cpy_f16_f16);
@ -479,10 +473,6 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
    const int64_t tsize = ggml_nbytes(t);
    if (t->buffer && t->buffer->backend && t->buffer->backend->context) {
        ctx = t->buffer->backend->context;
    }
    // find the view that contains the tensor fully
    for (int i = 0; i < ctx->n_buffers; ++i) {
        const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
@ -1149,7 +1139,6 @@ void ggml_metal_graph_compute(
                                switch (src0t) {
                                    case GGML_TYPE_F32:
                                        {
                                            GGML_ASSERT(src1t == GGML_TYPE_F32);
                                            [encoder setComputePipelineState:ctx->pipeline_mul_mv_f32_f32];
                                            nrows = 4;
                                        } break;
@ -1157,18 +1146,13 @@ void ggml_metal_graph_compute(
                                        {
                                            nth0 = 32;
                                            nth1 = 1;
-                                            if (src1t == GGML_TYPE_F32) {
+                                            if (ne11 * ne12 < 4) {
-                                                if (ne11 * ne12 < 4) {
+                                                [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row];
-                                                    [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row];
+                                            } else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) {
-                                                } else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) {
+                                                [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4];
-                                                    [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4];
+                                                nrows = ne11;
                                                    nrows = ne11;
                                                } else {
                                                    [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
                                                    nrows = 4;
                                                }
                                            } else {
-                                                [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f16];
+                                                [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
                                                nrows = 4;
                                            }
                                        } break;
@ -1480,58 +1464,6 @@ void ggml_metal_graph_compute(
                            [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
                        } break;
                    case GGML_OP_IM2COL:
                        {
                            GGML_ASSERT(src0->type == GGML_TYPE_F16);
                            GGML_ASSERT(src1->type == GGML_TYPE_F32);
                            GGML_ASSERT( dst->type == GGML_TYPE_F16);
                            const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
                            const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
                            const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
                            const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
                            const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
                            const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
                            const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
                            const int32_t N  = src1->ne[is_2D ? 3 : 2];
                            const int32_t IC = src1->ne[is_2D ? 2 : 1];
                            const int32_t IH = is_2D ? src1->ne[1] : 1;
                            const int32_t IW =         src1->ne[0];
                            const int32_t KH = is_2D ? src0->ne[1] : 1;
                            const int32_t KW =         src0->ne[0];
                            const int32_t OH = is_2D ? dst->ne[2] : 1;
                            const int32_t OW =         dst->ne[1];
                            const int32_t CHW = IC * KH * KW;
                            const int32_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4;
                            const int32_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4;
                            switch (src0->type) {
                                case GGML_TYPE_F32: GGML_ASSERT(false && "not implemented"); break;
                                case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_im2col_f16]; break;
                                default: GGML_ASSERT(false);
                            };
                            [encoder setBuffer:id_src1 offset:offs_src1        atIndex:0];
                            [encoder setBuffer:id_dst  offset:offs_dst         atIndex:1];
                            [encoder setBytes:&ofs0    length:sizeof( int32_t) atIndex:2];
                            [encoder setBytes:&ofs1    length:sizeof( int32_t) atIndex:3];
                            [encoder setBytes:&IW      length:sizeof( int32_t) atIndex:4];
                            [encoder setBytes:&IH      length:sizeof( int32_t) atIndex:5];
                            [encoder setBytes:&CHW     length:sizeof( int32_t) atIndex:6];
                            [encoder setBytes:&s0      length:sizeof( int32_t) atIndex:7];
                            [encoder setBytes:&s1      length:sizeof( int32_t) atIndex:8];
                            [encoder setBytes:&p0      length:sizeof( int32_t) atIndex:9];
                            [encoder setBytes:&p1      length:sizeof( int32_t) atIndex:10];
                            [encoder setBytes:&d0      length:sizeof( int32_t) atIndex:11];
                            [encoder setBytes:&d1      length:sizeof( int32_t) atIndex:12];
                            [encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
                        } break;
                    case GGML_OP_DUP:
                    case GGML_OP_CPY:
                    case GGML_OP_CONT:
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
        constant   int64_t & ne0,
        constant   int64_t & ne1,
        uint3 tgpig[[threadgroup_position_in_grid]],
-        uint  tiisg[[thread_index_in_simdgroup]]) {
+        uint tiisg[[thread_index_in_simdgroup]]) {
    const int64_t r0 = tgpig.x;
    const int64_t rb = tgpig.y*N_F32_F32;
@ -844,79 +844,6 @@ kernel void kernel_mul_mv_f32_f32(
    }
 }
 #define N_F16_F16 4
 kernel void kernel_mul_mv_f16_f16(
        device const  char * src0,
        device const  char * src1,
        device       float * dst,
        constant   int64_t & ne00,
        constant   int64_t & ne01,
        constant   int64_t & ne02,
        constant  uint64_t & nb00,
        constant  uint64_t & nb01,
        constant  uint64_t & nb02,
        constant   int64_t & ne10,
        constant   int64_t & ne11,
        constant   int64_t & ne12,
        constant  uint64_t & nb10,
        constant  uint64_t & nb11,
        constant  uint64_t & nb12,
        constant   int64_t & ne0,
        constant   int64_t & ne1,
        uint3 tgpig[[threadgroup_position_in_grid]],
        uint  tiisg[[thread_index_in_simdgroup]]) {
    const int64_t r0 = tgpig.x;
    const int64_t rb = tgpig.y*N_F16_F16;
    const int64_t im = tgpig.z;
    device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
    if (ne00 < 128) {
        for (int row = 0; row < N_F16_F16; ++row) {
            int r1 = rb + row;
            if (r1 >= ne11) {
                break;
            }
            device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
            float sumf = 0;
            for (int i = tiisg; i < ne00; i += 32) {
                sumf += (half) x[i] * (half) y[i];
            }
            float all_sum = simd_sum(sumf);
            if (tiisg == 0) {
                dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
            }
        }
    } else {
        device const half4 * x4 = (device const half4 *)x;
        for (int row = 0; row < N_F16_F16; ++row) {
            int r1 = rb + row;
            if (r1 >= ne11) {
                break;
            }
            device const half  * y  = (device const half  *) (src1 + r1*nb11 + im*nb12);
            device const half4 * y4 = (device const half4 *) y;
            float sumf = 0;
            for (int i = tiisg; i < ne00/4; i += 32) {
                for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
            }
            float all_sum = simd_sum(sumf);
            if (tiisg == 0) {
                for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
                dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
            }
        }
    }
 }
 kernel void kernel_mul_mv_f16_f32_1row(
        device const  char * src0,
        device const  char * src1,
@ -1302,39 +1229,6 @@ kernel void kernel_rope(
 template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
 template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
 kernel void kernel_im2col_f16(
        device const float * x,
        device       half * dst,
        constant   int32_t & ofs0,
        constant   int32_t & ofs1,
        constant   int32_t & IW,
        constant   int32_t & IH,
        constant   int32_t & CHW,
        constant   int32_t & s0,
        constant   int32_t & s1,
        constant   int32_t & p0,
        constant   int32_t & p1,
        constant   int32_t & d0,
        constant   int32_t & d1,
        uint3 tgpig[[threadgroup_position_in_grid]],
        uint3  tgpg[[threadgroups_per_grid]],
        uint3 tpitg[[thread_position_in_threadgroup]],
        uint3   ntg[[threads_per_threadgroup]]) {
    const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
    const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
    const int32_t offset_dst =
        (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
        (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
    if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
        dst[offset_dst] = 0.0f;
    } else {
        const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
        dst[offset_dst] = x[offset_src + iih * IW + iiw];
    }
 }
 kernel void kernel_cpy_f16_f16(
        device const half * src0,
        device       half * dst,
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@ -403,8 +403,13 @@ extern "C" {
        GGML_OP_ROPE_BACK,
        GGML_OP_ALIBI,
        GGML_OP_CLAMP,
        GGML_OP_CONV_1D,
        GGML_OP_CONV_1D_STAGE_0,  // internal
        GGML_OP_CONV_1D_STAGE_1,  // internal
        GGML_OP_CONV_TRANSPOSE_1D,
-        GGML_OP_IM2COL,
+        GGML_OP_CONV_2D,
        GGML_OP_CONV_2D_STAGE_0, // internal
        GGML_OP_CONV_2D_STAGE_1, // internal
        GGML_OP_CONV_TRANSPOSE_2D,
        GGML_OP_POOL_1D,
        GGML_OP_POOL_2D,
@ -1393,18 +1398,6 @@ extern "C" {
            float                 min,
            float                 max);
    GGML_API struct ggml_tensor * ggml_im2col(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
            struct ggml_tensor  * b,
            int                  s0,
            int                  s1,
            int                  p0,
            int                  p1,
            int                  d0,
            int                  d1,
            bool                 is_2D);
    GGML_API struct ggml_tensor * ggml_conv_1d(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@ -252,7 +252,7 @@ class WhisperANE(Whisper):
 def convert_encoder(hparams, model, quantize=False):
    model.eval()
-    input_shape = (1, hparams.n_mels, 3000)
+    input_shape = (1, 80, 3000)
    input_data = torch.randn(input_shape)
    traced_model = torch.jit.trace(model, input_data)
@ -302,7 +302,7 @@ if __name__ == "__main__":
    parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
    args = parser.parse_args()
-    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]:
+    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
        raise ValueError("Invalid model name")
    whisper = load_model(args.model).cpu()
--- a/models/convert-whisper-to-openvino.py
+++ b/models/convert-whisper-to-openvino.py
@ -9,7 +9,7 @@ import shutil
 def convert_encoder(hparams, encoder, mname):
    encoder.eval()
-    mel = torch.zeros((1, hparams.n_mels, 3000))
+    mel = torch.zeros((1, 80, 3000))
    onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
--- a/spm-headers/ggml.h
+++ b/spm-headers/ggml.h
@ -1 +0,0 @@
 ../ggml.h
--- a/spm-headers/whisper.h
+++ b/spm-headers/whisper.h
@ -1 +0,0 @@
 ../whisper.h
--- a/whisper.cpp
+++ b/whisper.cpp
--- a/whisper.h
+++ b/whisper.h
@ -1,8 +1,6 @@
 #ifndef WHISPER_H
 #define WHISPER_H
 #include "ggml.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
@ -112,15 +110,15 @@ extern "C" {
    // Various functions for loading a ggml whisper model.
    // Allocate (almost) all memory needed for the model.
    // Return NULL on failure
-    WHISPER_API struct whisper_context * whisper_init_from_file_with_params  (const char * path_model,              struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size,    struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_with_params            (struct whisper_model_loader * loader, struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params);
    // These are the same as the above, but the internal state of the context is not allocated automatically
    // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
-    WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state  (const char * path_model,              struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size,    struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
-    WHISPER_API struct whisper_context * whisper_init_with_params_no_state            (struct whisper_model_loader * loader, struct whisper_context_params params);
+    WHISPER_API struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params);
    WHISPER_DEPRECATED(
        WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
@ -572,7 +570,8 @@ extern "C" {
    // Control logging output; default behavior is to print to stderr
-    WHISPER_API void whisper_log_set(ggml_log_callback log_callback, void * user_data);
+    typedef void (*whisper_log_callback)(const char * line);
    WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
 #ifdef __cplusplus
 }
Author	SHA1	Message	Date
Georgi Gerganov	40be74271f	models : update readme	2023-11-07 13:53:01 +02:00
Georgi Gerganov	a0c0d08d0f	bench : fix n_mels	2023-11-07 13:45:56 +02:00
Georgi Gerganov	8fb0a1cd1c	bench : fix build + fix go bindings	2023-11-07 13:20:02 +02:00
Georgi Gerganov	185d3fd6d9	whisper : add support for large v3	2023-11-07 11:58:39 +02:00