tmp : demonstrate how to measure time of ggml ops

2025-07-01 23:10:47 +02:00 · 2023-03-09 09:28:06 +02:00
26 changed files with 102 additions and 978 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,5 @@
 *.o
 *.a
-*.mlmodel
-*.mlmodelc
 .cache/
 .vs/
 .vscode/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -54,8 +54,6 @@ if (APPLE)
    option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
    option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
    option(WHISPER_NO_FMA              "whisper: disable FMA" OFF)
-
-    option(WHISPER_COREML              "whisper: enable Core ML framework" OFF)
 else()
    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
 endif()
@ -88,33 +86,16 @@ endif()

 find_package(Threads REQUIRED)

-# on APPLE
-if (APPLE)
-    # include Accelerate framework
-    if (NOT WHISPER_NO_ACCELERATE)
-        find_library(ACCELERATE_FRAMEWORK Accelerate)
+# on APPLE - include Accelerate framework
+if (APPLE AND NOT WHISPER_NO_ACCELERATE)
+    find_library(ACCELERATE_FRAMEWORK Accelerate)
+    if (ACCELERATE_FRAMEWORK)
+        message(STATUS "Accelerate framework found")

-        if (ACCELERATE_FRAMEWORK)
-            message(STATUS "Accelerate framework found")
-
-            set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
-            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
-        else()
-            message(WARNING "Accelerate framework not found")
-        endif()
-    endif()
-
-    if (WHISPER_COREML)
-        find_library(FOUNDATION_FRAMEWORK Foundation)
-        find_library(COREML_FRAMEWORK CoreML)
-
-        if (COREML_FRAMEWORK)
-            message(STATUS "CoreML framework found")
-
-            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
-        else()
-            message(WARNING "CoreML framework not found")
-        endif()
+        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
+        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
+    else()
+        message(WARNING "Accelerate framework not found")
    endif()
 endif()

@ -191,9 +172,7 @@ else()
            if(NOT WHISPER_NO_FMA)
                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
            endif()
-            if(NOT WHISPER_NO_F16C)
-                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
-            endif()
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
        endif()
    endif()
 endif()
@ -202,33 +181,6 @@ if (WHISPER_PERF)
    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
 endif()

-#
-# whisper.coreml - Core ML support
-#
-
-if (WHISPER_COREML)
-    set(TARGET whisper.coreml)
-
-    add_library(${TARGET}
-        coreml/whisper-encoder.h
-        coreml/whisper-encoder.mm
-        coreml/whisper-encoder-impl.h
-        coreml/whisper-encoder-impl.m
-        )
-
-    include(DefaultTargetOptions)
-
-    target_include_directories(${TARGET} PUBLIC
-        .
-        )
-
-    target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
-
-    set_target_properties(${TARGET} PROPERTIES
-        COMPILE_FLAGS "-fobjc-arc"
-        )
-endif()
-
 #
 # whisper - this is the main library of the project
 #
@ -248,10 +200,6 @@ target_include_directories(${TARGET} PUBLIC
    .
    )

-if (WHISPER_COREML)
-    target_link_libraries(${TARGET} PRIVATE whisper.coreml)
-endif()
-
 if (MSVC)
    target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})

--- a/50
+++ b/50
@ -34,12 +34,6 @@ CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
 CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
 LDFLAGS  =

-# ref: https://github.com/ggerganov/whisper.cpp/issues/37
-ifneq ($(wildcard /usr/include/musl/*),)
-	CFLAGS   += -D_POSIX_SOURCE -D_GNU_SOURCE
-	CXXFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
-endif
-
 # OS specific
 # TODO: support Windows
 ifeq ($(UNAME_S),Linux)
@ -138,10 +132,6 @@ ifndef WHISPER_NO_ACCELERATE
 		LDFLAGS += -framework Accelerate
 	endif
 endif
-ifdef WHISPER_COREML
-	CXXFLAGS += -DWHISPER_USE_COREML
-	LDFLAGS  += -framework Foundation -framework CoreML
-endif
 ifdef WHISPER_OPENBLAS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
 	LDFLAGS += -lopenblas
@ -194,23 +184,11 @@ ggml.o: ggml.c ggml.h
 whisper.o: whisper.cpp whisper.h
 	$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o

-ifndef WHISPER_COREML
-WHISPER_OBJ = whisper.o
-else
-whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
-	$(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
+libwhisper.a: ggml.o whisper.o
+	$(AR) rcs libwhisper.a ggml.o whisper.o

-whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
-	$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
-
-WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
-endif
-
-libwhisper.a: ggml.o $(WHISPER_OBJ)
-	$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
-
-libwhisper.so: ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
+libwhisper.so: ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)

 clean:
 	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
@ -224,21 +202,21 @@ CC_SDL=`sdl2-config --cflags --libs`
 SRC_COMMON = examples/common.cpp
 SRC_COMMON_SDL = examples/common-sdl.cpp

-main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
 	./main -h

-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)

-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)

-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)

-bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
+bench: examples/bench/bench.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)

 #
 # Audio samples
--- a/README.md
+++ b/README.md
@ -466,7 +466,7 @@ The original models are converted to a custom binary format. This allows to pack
 You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
 or manually from here:

- https://huggingface.co/ggerganov/whisper.cpp
+- https://huggingface.co/datasets/ggerganov/whisper.cpp
 - https://ggml.ggerganov.com

 For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
@ -476,7 +476,6 @@ in [models](models).

 - [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
 - [X] Javascript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
-  - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
 - [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
 - [X] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
 - [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
@ -486,7 +485,6 @@ in [models](models).
 - [X] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
  - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
  - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
- [X] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)

 ## Examples

--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -17,9 +17,9 @@ import (
 // CONSTANTS

 const (
-	srcUrl  = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
-	srcExt  = ".bin"                                                      // Filename extension
-	bufSize = 1024 * 64                                                   // Size of the buffer used for downloading the model
+	srcUrl  = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main" // The location of the models
+	srcExt  = ".bin"                                                               // Filename extension
+	bufSize = 1024 * 64                                                            // Size of the buffer used for downloading the model
 )

 var (
--- a/coreml/whisper-encoder-impl.h
+++ b/coreml/whisper-encoder-impl.h
@ -1,142 +0,0 @@
-//
-// CoremlEncoder.h
-//
-// This file was automatically generated and should not be edited.
-//
-
-#import <Foundation/Foundation.h>
-#import <CoreML/CoreML.h>
-#include <stdint.h>
-#include <os/log.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-
-/// Model Prediction Input Type
-API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
-@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
-
-/// melSegment as 1 × 80 × 3000 3-dimensional array of floats
-@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
-
-@end
-
-
-/// Model Prediction Output Type
-API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
-@interface CoremlEncoderOutput : NSObject<MLFeatureProvider>
-
-/// output as multidimensional array of floats
-@property (readwrite, nonatomic, strong) MLMultiArray * output;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
-
-@end
-
-
-/// Class for model loading and prediction
-API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
-@interface CoremlEncoder : NSObject
-@property (readonly, nonatomic, nullable) MLModel * model;
-
-/**
-    URL of the underlying .mlmodelc directory.
-*/
-+ (nullable NSURL *)URLOfModelInThisBundle;
-
-/**
-    Initialize CoremlEncoder instance from an existing MLModel object.
-
-    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
-    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
-*/
- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
-
-/**
-    Initialize CoremlEncoder instance with the model in this bundle.
-*/
- (nullable instancetype)init;
-
-/**
-    Initialize CoremlEncoder instance with the model in this bundle.
-
-    @param configuration The model configuration object
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Initialize CoremlEncoder instance from the model URL.
-
-    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Initialize CoremlEncoder instance from the model URL.
-
-    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
-    @param configuration The model configuration object
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Construct CoremlEncoder instance asynchronously with configuration.
-    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
-
-    @param configuration The model configuration
-    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
-*/
-+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
-
-/**
-    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
-
-    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
-
-    @param modelURL The model URL.
-    @param configuration The model configuration
-    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
-*/
-+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
-
-/**
-    Make a prediction using the standard interface
-    @param input an instance of CoremlEncoderInput to predict from
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-    @return the prediction as CoremlEncoderOutput
-*/
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Make a prediction using the standard interface
-    @param input an instance of CoremlEncoderInput to predict from
-    @param options prediction options
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-    @return the prediction as CoremlEncoderOutput
-*/
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Make a prediction using the convenience interface
-    @param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-    @return the prediction as CoremlEncoderOutput
-*/
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-
-/**
-    Batch prediction
-    @param inputArray array of CoremlEncoderInput instances to obtain predictions from
-    @param options prediction options
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-    @return the predictions as NSArray<CoremlEncoderOutput *>
-*/
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
-@end
-
-NS_ASSUME_NONNULL_END
--- a/coreml/whisper-encoder-impl.m
+++ b/coreml/whisper-encoder-impl.m
@ -1,197 +0,0 @@
-//
-// CoremlEncoder.m
-//
-// This file was automatically generated and should not be edited.
-//
-
-#if !__has_feature(objc_arc)
-#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
-#endif
-
-#import "whisper-encoder-impl.h"
-
-@implementation CoremlEncoderInput
-
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
-    self = [super init];
-    if (self) {
-        _melSegment = melSegment;
-    }
-    return self;
-}
-
- (NSSet<NSString *> *)featureNames {
-    return [NSSet setWithArray:@[@"melSegment"]];
-}
-
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
-    if ([featureName isEqualToString:@"melSegment"]) {
-        return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
-    }
-    return nil;
-}
-
-@end
-
-@implementation CoremlEncoderOutput
-
- (instancetype)initWithOutput:(MLMultiArray *)output {
-    self = [super init];
-    if (self) {
-        _output = output;
-    }
-    return self;
-}
-
- (NSSet<NSString *> *)featureNames {
-    return [NSSet setWithArray:@[@"output"]];
-}
-
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
-    if ([featureName isEqualToString:@"output"]) {
-        return [MLFeatureValue featureValueWithMultiArray:self.output];
-    }
-    return nil;
-}
-
-@end
-
-@implementation CoremlEncoder
-
-
-/**
-    URL of the underlying .mlmodelc directory.
-*/
-+ (nullable NSURL *)URLOfModelInThisBundle {
-    NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"CoremlEncoder" ofType:@"mlmodelc"];
-    if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load CoremlEncoder.mlmodelc in the bundle resource"); return nil; }
-    return [NSURL fileURLWithPath:assetPath];
-}
-
-
-/**
-    Initialize CoremlEncoder instance from an existing MLModel object.
-
-    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
-    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
-*/
- (instancetype)initWithMLModel:(MLModel *)model {
-    self = [super init];
-    if (!self) { return nil; }
-    _model = model;
-    if (_model == nil) { return nil; }
-    return self;
-}
-
-
-/**
-    Initialize CoremlEncoder instance with the model in this bundle.
-*/
- (nullable instancetype)init {
-    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
-}
-
-
-/**
-    Initialize CoremlEncoder instance with the model in this bundle.
-
-    @param configuration The model configuration object
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
-}
-
-
-/**
-    Initialize CoremlEncoder instance from the model URL.
-
-    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
-    if (model == nil) { return nil; }
-    return [self initWithMLModel:model];
-}
-
-
-/**
-    Initialize CoremlEncoder instance from the model URL.
-
-    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
-    @param configuration The model configuration object
-    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
-*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
-    if (model == nil) { return nil; }
-    return [self initWithMLModel:model];
-}
-
-
-/**
-    Construct CoremlEncoder instance asynchronously with configuration.
-    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
-
-    @param configuration The model configuration
-    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
-*/
-+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
-    [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
-              configuration:configuration
-          completionHandler:handler];
-}
-
-
-/**
-    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
-
-    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
-
-    @param modelURL The model URL.
-    @param configuration The model configuration
-    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
-*/
-+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
-    [MLModel loadContentsOfURL:modelURL
-                 configuration:configuration
-             completionHandler:^(MLModel *model, NSError *error) {
-        if (model != nil) {
-            CoremlEncoder *typedModel = [[CoremlEncoder alloc] initWithMLModel:model];
-            handler(typedModel, nil);
-        } else {
-            handler(nil, error);
-        }
-    }];
-}
-
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
-}
-
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
-    if (!outFeatures) { return nil; }
-    return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
-}
-
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
-    return [self predictionFromFeatures:input_ error:error];
-}
-
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
-    id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
-    id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
-    if (!outBatch) { return nil; }
-    NSMutableArray<CoremlEncoderOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
-    for (NSInteger i = 0; i < outBatch.count; i++) {
-        id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
-        CoremlEncoderOutput * result = [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
-        [results addObject:result];
-    }
-    return results;
-}
-
-@end
--- a/coreml/whisper-encoder.h
+++ b/coreml/whisper-encoder.h
@ -1,22 +0,0 @@
-// Wrapper of the Core ML Whisper Encoder model
-//
-// Code is derived from the work of Github user @wangchou
-// ref: https://github.com/wangchou/callCoreMLFromCpp
-
-#if __cplusplus
-extern "C" {
-#endif
-
-struct whisper_coreml_context;
-
-struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
-void whisper_coreml_free(struct whisper_coreml_context * ctx);
-
-void whisper_coreml_encode(
-        const whisper_coreml_context * ctx,
-                               float * mel,
-                               float * out);
-
-#if __cplusplus
-}
-#endif
--- a/coreml/whisper-encoder.mm
+++ b/coreml/whisper-encoder.mm
@ -1,61 +0,0 @@
-#import "coreml/whisper-encoder.h"
-#import "coreml/whisper-encoder-impl.h"
-
-#import <CoreML/CoreML.h>
-
-#include <stdlib.h>
-
-#if __cplusplus
-extern "C" {
-#endif
-
-struct whisper_coreml_context {
-    const void * data;
-};
-
-struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
-    NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
-
-    NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
-
-    const void * data = CFBridgingRetain([[CoremlEncoder alloc] initWithContentsOfURL:url_model error:nil]);
-
-    if (data == NULL) {
-        return NULL;
-    }
-
-    whisper_coreml_context * ctx = new whisper_coreml_context;
-
-    ctx->data = data;
-
-    return ctx;
-}
-
-void whisper_coreml_free(struct whisper_coreml_context * ctx) {
-    CFRelease(ctx->data);
-    delete ctx;
-}
-
-void whisper_coreml_encode(
-        const whisper_coreml_context * ctx,
-                               float * mel,
-                               float * out) {
-    MLMultiArray * inMultiArray = [
-        [MLMultiArray alloc] initWithDataPointer: mel
-                                           shape: @[@1, @80, @3000]
-                                        dataType: MLMultiArrayDataTypeFloat32
-                                         strides: @[@(240000), @(3000), @1]
-                                     deallocator: nil
-                                           error: nil
-    ];
-
-    CoremlEncoderOutput * outCoreML = [(__bridge id) ctx->data predictionFromMelSegment:inMultiArray error:nil];
-
-    MLMultiArray * outMA = outCoreML.output;
-
-    memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
-}
-
-#if __cplusplus
-}
-#endif
--- a/examples/addon.node/addon.cpp
+++ b/examples/addon.node/addon.cpp
@ -292,64 +292,51 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
    return 0;
 }

-class Worker : public Napi::AsyncWorker {
- public:
-  Worker(Napi::Function& callback, whisper_params params)
-      : Napi::AsyncWorker(callback), params(params) {}
-
-  void Execute() override {
-    run(params, result);
-  }
-
-  void OnOK() override {
-    Napi::HandleScope scope(Env());
-    Napi::Object res = Napi::Array::New(Env(), result.size());
-    for (uint64_t i = 0; i < result.size(); ++i) {
-      Napi::Object tmp = Napi::Array::New(Env(), 3);
-      for (uint64_t j = 0; j < 3; ++j) {
-        tmp[j] = Napi::String::New(Env(), result[i][j]);
-      }
-      res[i] = tmp;
+Napi::Object whisper(const Napi::CallbackInfo& info) {
+    Napi::Env env = info.Env();
+    if (info.Length() <= 0 || !info[0].IsObject()) {
+        Napi::TypeError::New(env, "object expected").ThrowAsJavaScriptException();
    }
-    Callback().Call({Env().Null(), res});
-  }
+    whisper_params params;
+    std::vector<std::vector<std::string>> result;

- private:
-  whisper_params params;
-  std::vector<std::vector<std::string>> result;
-};
+    Napi::Object whisper_params = info[0].As<Napi::Object>();
+    std::string language = whisper_params.Get("language").As<Napi::String>();
+    std::string model = whisper_params.Get("model").As<Napi::String>();
+    std::string input = whisper_params.Get("fname_inp").As<Napi::String>();

+    params.language = language;
+    params.model = model;
+    params.fname_inp.emplace_back(input);

+    // run model
+    run(params, result);

-Napi::Value whisper(const Napi::CallbackInfo& info) {
-  Napi::Env env = info.Env();
-  if (info.Length() <= 0 || !info[0].IsObject()) {
-    Napi::TypeError::New(env, "object expected").ThrowAsJavaScriptException();
-  }
-  whisper_params params;
+    fprintf(stderr, "RESULT:\n");
+    for (auto sentence:result) {
+        fprintf(stderr, "t0: %s, t1: %s, content: %s \n",
+                sentence[0].c_str(), sentence[1].c_str(), sentence[2].c_str());
+    }

-  Napi::Object whisper_params = info[0].As<Napi::Object>();
-  std::string language = whisper_params.Get("language").As<Napi::String>();
-  std::string model = whisper_params.Get("model").As<Napi::String>();
-  std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
+    Napi::Object res = Napi::Array::New(env, result.size());
+    for (uint64_t i = 0; i < result.size(); ++i) {
+        Napi::Object tmp = Napi::Array::New(env, 3);
+        for (uint64_t j = 0; j < 3; ++j) {
+            tmp[j] = Napi::String::New(env, result[i][j]);
+        }
+        res[i] = tmp;
+    }

-  params.language = language;
-  params.model = model;
-  params.fname_inp.emplace_back(input);
-
-  Napi::Function callback = info[1].As<Napi::Function>();
-  Worker* worker = new Worker(callback, params);
-  worker->Queue();
-  return env.Undefined();
+    return res;
 }


 Napi::Object Init(Napi::Env env, Napi::Object exports) {
-  exports.Set(
-      Napi::String::New(env, "whisper"),
-      Napi::Function::New(env, whisper)
-  );
-  return exports;
+    exports.Set(
+            Napi::String::New(env, "whisper"),
+            Napi::Function::New(env, whisper)
+    );
+    return exports;
 }

 NODE_API_MODULE(whisper, Init);
--- a/examples/addon.node/index.js
+++ b/examples/addon.node/index.js
@ -1,36 +1,27 @@
-const path = require("path");
-const { whisper } = require(path.join(
-  __dirname,
-  "../../build/Release/whisper-addon"
-));
-const { promisify } = require("util");
-
-const whisperAsync = promisify(whisper);
+const path = require('path');
+const { whisper } = require(path.join(__dirname, '../../build/Release/whisper-addon'));

 const whisperParams = {
-  language: "en",
-  model: path.join(__dirname, "../../models/ggml-base.en.bin"),
-  fname_inp: "../../samples/jfk.wav",
+    language: 'en',
+    model: path.join(__dirname, '../../models/ggml-base.en.bin'),
+    fname_inp: '',
 };

 const arguments = process.argv.slice(2);
 const params = Object.fromEntries(
-  arguments.reduce((pre, item) => {
-    if (item.startsWith("--")) {
-      return [...pre, item.slice(2).split("=")];
-    }
-    return pre;
-  }, [])
+    arguments.reduce((pre, item) => {
+        if (item.startsWith("--")) {
+            return [...pre, item.slice(2).split("=")];
+        }
+        return pre;
+    }, []),
 );

 for (const key in params) {
-  if (whisperParams.hasOwnProperty(key)) {
-    whisperParams[key] = params[key];
-  }
+    if (whisperParams.hasOwnProperty(key)) {
+        whisperParams[key] = params[key];
+    }
 }

-console.log("whisperParams =", whisperParams);
-
-whisperAsync(whisperParams).then((result) => {
-  console.log(`Result from whisper: ${result}`);
-});
+console.log('whisperParams =', whisperParams);
+console.log(whisper(whisperParams));
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -31,7 +31,6 @@ options:
  -osrt,     --output-srt        [false  ] output result in a srt file
  -owts,     --output-words      [false  ] output script for generating karaoke video
  -ocsv,     --output-csv        [false  ] output result in a CSV file
-  -oj,       --output-json       [false  ] output result in a JSON file
  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -73,7 +73,6 @@ struct whisper_params {
    bool output_srt     = false;
    bool output_wts     = false;
    bool output_csv     = false;
-    bool output_jsn     = false;
    bool print_special  = false;
    bool print_colors   = false;
    bool print_progress = false;
@ -131,7 +130,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-owts" || arg == "--output-words")   { params.output_wts     = true; }
        else if (arg == "-fp"   || arg == "--font-path")      { params.font_path      = argv[++i]; }
        else if (arg == "-ocsv" || arg == "--output-csv")     { params.output_csv     = true; }
-        else if (arg == "-oj"   || arg == "--output-json")    { params.output_jsn     = true; }
        else if (arg == "-of"   || arg == "--output-file")    { params.fname_out.emplace_back(argv[++i]); }
        else if (arg == "-ps"   || arg == "--print-special")  { params.print_special  = true; }
        else if (arg == "-pc"   || arg == "--print-colors")   { params.print_colors   = true; }
@ -180,7 +178,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -owts,     --output-words      [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
    fprintf(stderr, "  -fp,       --font-path         [%-7s] path to a monospace font for karaoke video\n",     params.font_path.c_str());
    fprintf(stderr, "  -ocsv,     --output-csv        [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
-    fprintf(stderr, "  -oj,       --output-json       [%-7s] output result in a JSON file\n",                   params.output_jsn ? "true" : "false");
    fprintf(stderr, "  -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n",      "");
    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
@ -371,129 +368,6 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
    return true;
 }

-bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params) {
-    std::ofstream fout(fname);
-    int indent = 0;
-
-    auto doindent = [&]() {
-        for (int i = 0; i < indent; i++) fout << "\t";
-    };
-
-    auto start_arr = [&](const char *name) {
-        doindent();
-        fout << "\"" << name << "\": [\n";
-        indent++;
-    };
-
-    auto end_arr = [&](bool end = false) {
-        indent--;
-        doindent();
-        fout << (end ? "]\n" : "},\n");
-    };
-
-    auto start_obj = [&](const char *name = nullptr) {
-        doindent();
-        if (name) {
-            fout << "\"" << name << "\": {\n";
-        } else {
-            fout << "{\n";
-        }
-        indent++;
-    };
-
-    auto end_obj = [&](bool end = false) {
-        indent--;
-        doindent();
-        fout << (end ? "}\n" : "},\n");
-    };
-
-    auto start_value = [&](const char *name) {
-        doindent();
-        fout << "\"" << name << "\": ";
-    };
-
-    auto value_s = [&](const char *name, const char *val, bool end = false) {
-        start_value(name);
-        fout << "\"" << val << (end ? "\"\n" : "\",\n");
-    };
-
-    auto end_value = [&](bool end = false) {
-        fout << (end ? "\n" : ",\n");
-    };
-
-    auto value_i = [&](const char *name, const int64_t val, bool end = false) {
-        start_value(name);
-        fout << val;
-        end_value(end);
-    };
-
-    auto value_b = [&](const char *name, const bool val, bool end = false) {
-        start_value(name);
-        fout << (val ? "true" : "false");
-        end_value(end);
-    };
-
-    if (!fout.is_open()) {
-        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
-        return false;
-    }
-
-    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
-    start_obj();
-        value_s("systeminfo", whisper_print_system_info());
-        start_obj("model");
-            value_s("type", whisper_model_type_readable(ctx));
-            value_b("multilingual", whisper_is_multilingual(ctx));
-            value_i("vocab", whisper_model_n_vocab(ctx));
-            start_obj("audio");
-                value_i("ctx", whisper_model_n_audio_ctx(ctx));
-                value_i("state", whisper_model_n_audio_state(ctx));
-                value_i("head", whisper_model_n_audio_head(ctx));
-                value_i("layer", whisper_model_n_audio_layer(ctx), true);
-            end_obj();
-            start_obj("text");
-                value_i("ctx", whisper_model_n_text_ctx(ctx));
-                value_i("state", whisper_model_n_text_state(ctx));
-                value_i("head", whisper_model_n_text_head(ctx));
-                value_i("leyer", whisper_model_n_text_layer(ctx), true);
-            end_obj();
-            value_i("mels", whisper_model_n_mels(ctx));
-            value_i("f16", whisper_model_f16(ctx), true);
-        end_obj();
-        start_obj("params");
-            value_s("model", params.model.c_str());
-            value_s("language", params.language.c_str());
-            value_b("translate", params.translate, true);
-        end_obj();
-        start_obj("result");
-            value_s("language", whisper_lang_str(whisper_full_lang_id(ctx)), true);
-        end_obj();
-        start_arr("transcription");
-
-            const int n_segments = whisper_full_n_segments(ctx);
-            for (int i = 0; i < n_segments; ++i) {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-                const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-                start_obj();
-                    start_obj("timestanps");
-                        value_s("from", to_timestamp(t0, true).c_str());
-                        value_s("to", to_timestamp(t1, true).c_str(), true);
-                    end_obj();
-                    start_obj("offsets");
-                        value_i("from", t0 * 10);
-                        value_i("to", t1 * 10, true);
-                    end_obj();
-                    value_s("text", text, true);
-                end_obj(i == (n_segments - 1));
-            }
-
-        end_arr(true);
-    end_obj(true);
-    return true;
-}
-
 // karaoke video generation
 // outputs a bash script that uses ffmpeg to generate a video with the subtitles
 // TODO: font parameter adjustments
@ -788,12 +662,6 @@ int main(int argc, char ** argv) {
                const auto fname_csv = fname_out + ".csv";
                output_csv(ctx, fname_csv.c_str());
            }
-
-            // output to JSON file
-            if (params.output_jsn) {
-                const auto fname_jsn = fname_out + ".json";
-                output_json(ctx, fname_jsn.c_str(), params);
-            }
        }
    }

--- a/examples/talk/README.md
+++ b/examples/talk/README.md
@ -31,7 +31,7 @@ To run this, you will need a ggml GPT-2 model: [instructions](https://github.com
 Alternatively, you can simply download the smallest ggml GPT-2 117M model (240 MB) like this:

 ```
-wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://huggingface.co/ggerganov/ggml/raw/main/ggml-model-gpt-2-117M.bin
+wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://huggingface.co/datasets/ggerganov/ggml/raw/main/ggml-model-gpt-2-117M.bin
 ```

 ## TTS
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -24,5 +24,3 @@ Also, don't forget to add the `-DGGML_USE_ACCELERATE` compiler flag in Build Pha
 This can significantly improve the performance of the transcription:

 <img width="1072" alt="image" src="https://user-images.githubusercontent.com/1991296/208511239-8d7cdbd1-aa48-41b5-becd-ca288d53cc07.png">
-
-In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
@ -296,10 +296,6 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
-				OTHER_CFLAGS = (
-					"-O3",
-					"-DNDEBUG",
-				);
 				SDKROOT = iphoneos;
 				VALIDATE_PRODUCT = YES;
 			};
--- a/examples/whisper.swiftui/README.md
+++ b/examples/whisper.swiftui/README.md
@ -7,9 +7,8 @@ To use:
 2. Add the model to "whisper.swiftui.demo/Resources/models" via Xcode.
 3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)).
 4. Add the model to "whisper.swiftui.demo/Resources/samples" via Xcode.
-5. Select the "Release" [^2] build configuration under "Run", then deploy and run to your device.
+5. Select the "release" build configuration under "Run", then deploy and run to your device.

 [^1]: I recommend the tiny, base or small models for running on an iOS device.
-[^2]: The `Release` build can boost performance of transcription. In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.

 ![image](https://user-images.githubusercontent.com/1991296/212539216-0aef65e4-f882-480a-8358-0f816838fd52.png)
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
@ -430,10 +430,6 @@
 				LLVM_LTO = YES;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MARKETING_VERSION = 1.0;
-				OTHER_CFLAGS = (
-					"-O3",
-					"-DNDEBUG",
-				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = auto;
--- a/ggml.c
+++ b/ggml.c
@ -79,7 +79,7 @@ typedef void* thread_ret_t;
 #define static_assert(cond, msg) _Static_assert(cond, msg)
 #endif

-/*#define GGML_PERF*/
+#define GGML_PERF
 #define GGML_DEBUG 0
 #define GGML_GELU_FP16

--- a/models/README.md
+++ b/models/README.md
@ -6,7 +6,7 @@ using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. You can either
 the `ggml` files yourself using the conversion script, or you can use the [download-ggml-model.sh](download-ggml-model.sh)
 script to download the already converted models. Currently, they are hosted on the following locations:

- https://huggingface.co/ggerganov/whisper.cpp
+- https://huggingface.co/datasets/ggerganov/whisper.cpp
 - https://ggml.ggerganov.com

 Sample usage:
@ -23,7 +23,7 @@ You can now use it like this:

 A third option to obtain the model files is to download them from Hugging Face:

-https://huggingface.co/ggerganov/whisper.cpp/tree/main
+https://huggingface.co/datasets/ggerganov/whisper.cpp/tree/main

 ## Available models

--- a/models/convert-h5-to-ggml.py
+++ b/models/convert-h5-to-ggml.py
@ -79,11 +79,11 @@ dir_model   = sys.argv[1]
 dir_whisper = sys.argv[2]
 dir_out     = sys.argv[3]

-with open(dir_model + "/vocab.json", "r", encoding="utf8") as f:
+with open(dir_model + "/vocab.json", "r") as f:
    encoder = json.load(f)
-with open(dir_model + "/added_tokens.json", "r", encoding="utf8") as f:
+with open(dir_model + "/added_tokens.json", "r") as f:
    encoder_added = json.load(f)
-with open(dir_model + "/config.json", "r", encoding="utf8") as f:
+with open(dir_model + "/config.json", "r") as f:
    hparams = json.load(f)

 model = WhisperForConditionalGeneration.from_pretrained(dir_model)
--- a/models/download-coreml-model.sh
+++ b/models/download-coreml-model.sh
@ -1,82 +0,0 @@
-#!/bin/bash
-
-# This script downloads Whisper model files that have already been converted to Core ML format.
-# This way you don't have to convert them yourself.
-
-src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
-pfx="resolve/main/ggml"
-
-# get the path of this script
-function get_script_path() {
-    if [ -x "$(command -v realpath)" ]; then
-        echo "$(dirname $(realpath $0))"
-    else
-        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
-        echo "$ret"
-    fi
-}
-
-models_path="$(get_script_path)"
-
-# Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
-
-# list available models
-function list_models {
-    printf "\n"
-    printf "  Available models:"
-    for model in "${models[@]}"; do
-        printf " $model"
-    done
-    printf "\n\n"
-}
-
-if [ "$#" -ne 1 ]; then
-    printf "Usage: $0 <model>\n"
-    list_models
-
-    exit 1
-fi
-
-model=$1
-
-if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
-    printf "Invalid model: $model\n"
-    list_models
-
-    exit 1
-fi
-
-# download Core ML model
-
-printf "Downloading Core ML model $model from '$src' ...\n"
-
-cd $models_path
-
-if [ -f "ggml-$model.mlmodel" ]; then
-    printf "Model $model already exists. Skipping download.\n"
-    exit 0
-fi
-
-if [ -x "$(command -v wget)" ]; then
-    wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
-elif [ -x "$(command -v curl)" ]; then
-    curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
-else
-    printf "Either wget or curl is required to download models.\n"
-    exit 1
-fi
-
-
-if [ $? -ne 0 ]; then
-    printf "Failed to download Core ML model $model \n"
-    printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
-    exit 1
-fi
-
-printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
-printf "Run the following command to compile it:\n\n"
-printf "  $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
-printf "You can now use it like this:\n\n"
-printf "  $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
-printf "\n"
--- a/models/download-ggml-model.cmd
+++ b/models/download-ggml-model.cmd
@ -40,7 +40,7 @@ if exist "ggml-%model%.bin" (
  goto :eof
 )

-PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-WebRequest -Uri https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-%model%.bin -OutFile ggml-%model%.bin"
+PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-WebRequest -Uri https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-%model%.bin -OutFile ggml-%model%.bin"

 if %ERRORLEVEL% neq 0 (
  echo Failed to download ggml model %model%
--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@ -6,7 +6,7 @@
 #src="https://ggml.ggerganov.com"
 #pfx="ggml-model-whisper"

-src="https://huggingface.co/ggerganov/whisper.cpp"
+src="https://huggingface.co/datasets/ggerganov/whisper.cpp"
 pfx="resolve/main/ggml"

 # get the path of this script
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1,8 +1,5 @@
 #define WHISPER_BUILD
 #include "whisper.h"
-#if WHISPER_USE_COREML
-#include "coreml/whisper-encoder.h"
-#endif

 #include "ggml.h"

@ -589,10 +586,6 @@ struct whisper_state {

    int lang_id = 0; // english by default

-#ifdef WHISPER_USE_COREML
-    whisper_coreml_context * ctx_coreml;
-#endif
-
    // [EXPERIMENTAL] token-level timestamps data
    int64_t t_beg = 0;
    int64_t t_last = 0;
@ -638,13 +631,12 @@ struct whisper_context {
    int64_t t_load_us = 0;
    int64_t t_start_us = 0;

+
    ggml_type wtype = ggml_type::GGML_TYPE_F16; // weight type (FP32 or FP16)

    whisper_model model;
    whisper_vocab vocab;
    whisper_state * state = nullptr;
-
-    std::string path_model; // populated by whisper_init_from_file()
 };

 template<typename T>
@ -1375,7 +1367,6 @@ static bool whisper_encode_internal(
        }
    }

-#ifndef WHISPER_USE_COREML
    struct ggml_tensor * cur;

    // convolution + gelu
@ -1418,7 +1409,7 @@ static bool whisper_encode_internal(
    //}

    static int iter = 0;
-
+    
    const size_t e_pe_stride = model.e_pe->ne[0]*ggml_element_size(model.e_pe);
    const size_t e_pe_offset = model.e_pe->ne[0]*ggml_element_size(model.e_pe)*n_ctx*iter;

@ -1607,7 +1598,7 @@ static bool whisper_encode_internal(
                        ggml_repeat(ctx0, layer.mlp_ln_w, cur),
                        cur),
                    ggml_repeat(ctx0, layer.mlp_ln_b, cur));
-            }
+    }

 #ifdef WHISPER_USE_FLASH_FF
            wstate.use_buf(ctx0, 0);
@ -1647,7 +1638,7 @@ static bool whisper_encode_internal(
                ggml_repeat(ctx0, layer.mlp_1_b, cur),
                cur);
 #endif
-        }
+}

        wstate.use_buf(ctx0, 3);

@ -1684,13 +1675,6 @@ static bool whisper_encode_internal(

        //ggml_graph_print(&gf);
    }
-#else
-    wstate.use_buf(ctx0, -1);
-
-    struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
-
-    whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
-#endif

    // cur
    //{
@ -2176,6 +2160,12 @@ static bool whisper_decode_internal(
        ggml_graph_compute       (ctx0, &gf);
    }

+    // print the time for computing the last ggml_mul_mat that computes logits
+    // also print the total decoder time
+    // these need to be called after ggml_graph_compute()
+    printf("logits t = %7.3f ms (%2d runs, N = %3d, ggml_mul_mat: [%d x %d] * [%d x %d])\n", 1e-3*double(logits->perf_time_us)/logits->perf_runs, logits->perf_runs, N, logits->ne[0], logits->ne[1], cur->ne[1], cur->ne[0]);
+    printf("total  t = %7.3f ms (%2d runs)\n", 1e-3*double(gf.perf_time_us)/gf.perf_runs, gf.perf_runs);
+
    // extract logits for all N tokens
    //logits_out.resize(N*n_vocab);
    //memcpy(logits_out.data(), ggml_get_data(logits), sizeof(float)*N*n_vocab);
@ -2494,25 +2484,12 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
 // interface implementation
 //

-#ifdef WHISPER_USE_COREML
-// replace .bin with .mlmodelc
-static std::string whisper_get_coreml_path(std::string path_bin) {
-    auto pos = path_bin.rfind('.');
-    if (pos != std::string::npos) {
-        path_bin = path_bin.substr(0, pos);
-    }
-
-    path_bin += ".mlmodelc";
-
-    return path_bin;
-}
-#endif
-
 struct whisper_state * whisper_init_state(whisper_context * ctx) {
    whisper_state * state = new whisper_state;

    const size_t scale = ctx->model.hparams.f16 ? 1 : 2;

+
    if (!kv_cache_init(ctx->model.hparams, scale * MEM_REQ_KV_SELF.at(ctx->model.type), state->decoders[0].kv_self, ctx->wtype, ctx->model.hparams.n_text_ctx)) {
        fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
        return nullptr;
@ -2533,20 +2510,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
        fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
    }

-#ifdef WHISPER_USE_COREML
-        const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
-
-        fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
-        fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
-
-        state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
-        if (!state->ctx_coreml) {
-            fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
-            return nullptr;
-        }
-
-        fprintf(stderr, "%s: Core ML model loaded\n", __func__);
-#endif

    state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);

@ -2582,7 +2545,6 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
    }

    loader.context = &fin;
-
    loader.read = [](void * ctx, void * output, size_t read_size) {
        std::ifstream * fin = (std::ifstream*)ctx;
        fin->read((char *)output, read_size);
@ -2599,13 +2561,7 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
        fin->close();
    };

-    auto ctx = whisper_init_no_state(&loader);
-
-    if (ctx) {
-        ctx->path_model = path_model;
-    }
-
-    return ctx;
+    return whisper_init_no_state(&loader);
 }

 struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size) {
@ -2730,10 +2686,6 @@ void whisper_free(struct whisper_context * ctx) {

        whisper_free_state(ctx->state);

-#ifdef WHISPER_USE_COREML
-        whisper_coreml_free(ctx->state->ctx_coreml);
-        ctx->state->ctx_coreml = nullptr;
-#endif
        delete ctx;
    }
 }
@ -2910,7 +2862,7 @@ int whisper_lang_auto_detect_with_state(
    }

    // run the encoder
-    if (whisper_encode_with_state(ctx, state, seek, n_threads) != 0) {
+    if (whisper_encode(ctx, seek, n_threads) != 0) {
        fprintf(stderr, "%s: failed to encode\n", __func__);
        return -6;
    }
@ -2974,71 +2926,6 @@ int whisper_lang_auto_detect(
    return whisper_lang_auto_detect_with_state(ctx, ctx->state, offset_ms, n_threads, lang_probs);
 }

-int whisper_model_n_vocab(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_vocab;
-}
-
-int whisper_model_n_audio_ctx(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_audio_ctx;
-}
-
-int whisper_model_n_audio_state(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_audio_state;
-}
-
-int whisper_model_n_audio_head(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_audio_head;
-}
-
-int whisper_model_n_audio_layer(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_audio_layer;
-}
-
-int whisper_model_n_text_ctx(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_text_ctx;
-}
-
-int whisper_model_n_text_state(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_text_state;
-}
-
-int whisper_model_n_text_head(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_text_head;
-}
-
-int whisper_model_n_text_layer(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_text_layer;
-}
-
-int whisper_model_n_mels(struct whisper_context * ctx) {
-    return ctx->model.hparams.n_mels;
-}
-
-int whisper_model_f16(struct whisper_context * ctx) {
-    return ctx->model.hparams.f16;
-}
-
-int whisper_model_type(struct whisper_context * ctx) {
-    return ctx->model.type;
-}
-
-const char *whisper_model_type_readable(struct whisper_context * ctx) {
-    switch (ctx->model.type) {
-    case e_model::MODEL_TINY:
-        return "tiny";
-    case e_model::MODEL_BASE:
-        return "base";
-    case e_model::MODEL_SMALL:
-        return "small";
-    case e_model::MODEL_MEDIUM:
-        return "medium";
-    case e_model::MODEL_LARGE:
-        return "large";
-    default:
-        return "unknown";
-    }
-}
-
 int whisper_n_len_from_state(struct whisper_state * state) {
    return state->mel.n_len;
 }
@ -3201,7 +3088,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
        /*.max_initial_ts   =*/  1.0f,
        /*.length_penalty   =*/ -1.0f,

-        /*.temperature_inc  =*/  0.0f, // TODO: temporary disabled until improve performance
+        /*.temperature_inc  =*/  0.2f,
        /*.entropy_thold    =*/  2.4f,
        /*.logprob_thold    =*/ -1.0f,
        /*.no_speech_thold  =*/  0.6f,
--- a/whisper.h
+++ b/whisper.h
@ -248,19 +248,6 @@ extern "C" {
    WHISPER_API int whisper_n_audio_ctx     (struct whisper_context * ctx);
    WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);

-    WHISPER_API int whisper_model_n_vocab      (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_audio_ctx  (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_audio_state(struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_audio_head (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_audio_layer(struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_text_ctx   (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_text_state (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_text_head  (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_text_layer (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_n_mels       (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_f16          (struct whisper_context * ctx);
-    WHISPER_API int whisper_model_type         (struct whisper_context * ctx);
-
    // Token logits obtained from the last call to whisper_decode()
    // The logits for the last token are stored in the last row
    // Rows: n_tokens
@ -270,8 +257,6 @@ extern "C" {

    // Token Id -> String. Uses the vocabulary in the provided context
    WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
-    WHISPER_API const char * whisper_model_type_readable(struct whisper_context * ctx);
-

    // Special tokens
    WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);