Compare commits

..

5 Commits

19 changed files with 1959 additions and 882 deletions

2
.gitignore vendored
View File

@ -1,7 +1,5 @@
*.o
*.a
*.mlmodel
*.mlmodelc
.cache/
.vs/
.vscode/

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.0)
project(whisper.cpp VERSION 1.2.1)
project(whisper.cpp VERSION 1.2.0)
# Add path to modules
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
@ -54,8 +54,6 @@ if (APPLE)
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
else()
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
endif()
@ -88,33 +86,16 @@ endif()
find_package(Threads REQUIRED)
# on APPLE
if (APPLE)
# include Accelerate framework
if (NOT WHISPER_NO_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
# on APPLE - include Accelerate framework
if (APPLE AND NOT WHISPER_NO_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found")
if (ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found")
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
else()
message(WARNING "Accelerate framework not found")
endif()
endif()
if (WHISPER_COREML)
find_library(FOUNDATION_FRAMEWORK Foundation)
find_library(COREML_FRAMEWORK CoreML)
if (COREML_FRAMEWORK)
message(STATUS "CoreML framework found")
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
else()
message(WARNING "CoreML framework not found")
endif()
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
else()
message(WARNING "Accelerate framework not found")
endif()
endif()
@ -200,33 +181,6 @@ if (WHISPER_PERF)
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
endif()
#
# whisper.coreml - Core ML support
#
if (WHISPER_COREML)
set(TARGET whisper.coreml)
add_library(${TARGET}
coreml/whisper-encoder.h
coreml/whisper-encoder.mm
coreml/whisper-encoder-impl.h
coreml/whisper-encoder-impl.m
)
include(DefaultTargetOptions)
target_include_directories(${TARGET} PUBLIC
.
)
target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
set_target_properties(${TARGET} PROPERTIES
COMPILE_FLAGS "-fobjc-arc"
)
endif()
#
# whisper - this is the main library of the project
#
@ -246,10 +200,6 @@ target_include_directories(${TARGET} PUBLIC
.
)
if (WHISPER_COREML)
target_link_libraries(${TARGET} PRIVATE whisper.coreml)
endif()
if (MSVC)
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})

View File

@ -30,8 +30,8 @@ endif
# Compile flags
#
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
CFLAGS = -I. -O3 -std=c11 -fPIC
CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
LDFLAGS =
# OS specific
@ -132,10 +132,6 @@ ifndef WHISPER_NO_ACCELERATE
LDFLAGS += -framework Accelerate
endif
endif
ifdef WHISPER_COREML
CXXFLAGS += -DWHISPER_USE_COREML
LDFLAGS += -framework Foundation -framework CoreML
endif
ifdef WHISPER_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
@ -145,8 +141,6 @@ ifdef WHISPER_GPROF
CXXFLAGS += -pg
endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, 2, 3
@ -188,23 +182,11 @@ ggml.o: ggml.c ggml.h
whisper.o: whisper.cpp whisper.h
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
ifndef WHISPER_COREML
WHISPER_OBJ = whisper.o
else
whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
$(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
libwhisper.a: ggml.o whisper.o
$(AR) rcs libwhisper.a ggml.o whisper.o
whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
endif
libwhisper.a: ggml.o $(WHISPER_OBJ)
$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
libwhisper.so: ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
libwhisper.so: ggml.o whisper.o
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
clean:
rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
@ -218,21 +200,21 @@ CC_SDL=`sdl2-config --cflags --libs`
SRC_COMMON = examples/common.cpp
SRC_COMMON_SDL = examples/common-sdl.cpp
main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
./main -h
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
bench: examples/bench/bench.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
#
# Audio samples

View File

@ -4,7 +4,7 @@
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
Stable: [v1.2.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
@ -471,7 +471,6 @@ in [models](models).
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
- [X] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
## Examples

View File

@ -1,6 +1,6 @@
{
"name": "whisper.cpp",
"version": "1.2.1",
"version": "1.2.0",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {

View File

@ -1,142 +0,0 @@
//
// CoremlEncoder.h
//
// This file was automatically generated and should not be edited.
//
#import <Foundation/Foundation.h>
#import <CoreML/CoreML.h>
#include <stdint.h>
#include <os/log.h>
NS_ASSUME_NONNULL_BEGIN
/// Model Prediction Input Type
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
/// melSegment as 1 × 80 × 3000 3-dimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
@end
/// Model Prediction Output Type
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoderOutput : NSObject<MLFeatureProvider>
/// output as multidimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * output;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
@end
/// Class for model loading and prediction
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoder : NSObject
@property (readonly, nonatomic, nullable) MLModel * model;
/**
URL of the underlying .mlmodelc directory.
*/
+ (nullable NSURL *)URLOfModelInThisBundle;
/**
Initialize CoremlEncoder instance from an existing MLModel object.
Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
*/
- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
/**
Initialize CoremlEncoder instance with the model in this bundle.
*/
- (nullable instancetype)init;
/**
Initialize CoremlEncoder instance with the model in this bundle.
@param configuration The model configuration object
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Initialize CoremlEncoder instance from the model URL.
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Initialize CoremlEncoder instance from the model URL.
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
@param configuration The model configuration object
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Construct CoremlEncoder instance asynchronously with configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
@param configuration The model configuration
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
*/
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
/**
Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
@param modelURL The model URL.
@param configuration The model configuration
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
*/
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
/**
Make a prediction using the standard interface
@param input an instance of CoremlEncoderInput to predict from
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
@return the prediction as CoremlEncoderOutput
*/
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Make a prediction using the standard interface
@param input an instance of CoremlEncoderInput to predict from
@param options prediction options
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
@return the prediction as CoremlEncoderOutput
*/
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Make a prediction using the convenience interface
@param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
@return the prediction as CoremlEncoderOutput
*/
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
/**
Batch prediction
@param inputArray array of CoremlEncoderInput instances to obtain predictions from
@param options prediction options
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
@return the predictions as NSArray<CoremlEncoderOutput *>
*/
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
@end
NS_ASSUME_NONNULL_END

View File

@ -1,197 +0,0 @@
//
// CoremlEncoder.m
//
// This file was automatically generated and should not be edited.
//
#if !__has_feature(objc_arc)
#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
#endif
#import "whisper-encoder-impl.h"
@implementation CoremlEncoderInput
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
self = [super init];
if (self) {
_melSegment = melSegment;
}
return self;
}
- (NSSet<NSString *> *)featureNames {
return [NSSet setWithArray:@[@"melSegment"]];
}
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
if ([featureName isEqualToString:@"melSegment"]) {
return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
}
return nil;
}
@end
@implementation CoremlEncoderOutput
- (instancetype)initWithOutput:(MLMultiArray *)output {
self = [super init];
if (self) {
_output = output;
}
return self;
}
- (NSSet<NSString *> *)featureNames {
return [NSSet setWithArray:@[@"output"]];
}
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
if ([featureName isEqualToString:@"output"]) {
return [MLFeatureValue featureValueWithMultiArray:self.output];
}
return nil;
}
@end
@implementation CoremlEncoder
/**
URL of the underlying .mlmodelc directory.
*/
+ (nullable NSURL *)URLOfModelInThisBundle {
NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"CoremlEncoder" ofType:@"mlmodelc"];
if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load CoremlEncoder.mlmodelc in the bundle resource"); return nil; }
return [NSURL fileURLWithPath:assetPath];
}
/**
Initialize CoremlEncoder instance from an existing MLModel object.
Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
*/
- (instancetype)initWithMLModel:(MLModel *)model {
self = [super init];
if (!self) { return nil; }
_model = model;
if (_model == nil) { return nil; }
return self;
}
/**
Initialize CoremlEncoder instance with the model in this bundle.
*/
- (nullable instancetype)init {
return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
}
/**
Initialize CoremlEncoder instance with the model in this bundle.
@param configuration The model configuration object
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
}
/**
Initialize CoremlEncoder instance from the model URL.
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
if (model == nil) { return nil; }
return [self initWithMLModel:model];
}
/**
Initialize CoremlEncoder instance from the model URL.
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
@param configuration The model configuration object
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
*/
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
if (model == nil) { return nil; }
return [self initWithMLModel:model];
}
/**
Construct CoremlEncoder instance asynchronously with configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
@param configuration The model configuration
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
*/
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
[self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
configuration:configuration
completionHandler:handler];
}
/**
Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
@param modelURL The model URL.
@param configuration The model configuration
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
*/
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
[MLModel loadContentsOfURL:modelURL
configuration:configuration
completionHandler:^(MLModel *model, NSError *error) {
if (model != nil) {
CoremlEncoder *typedModel = [[CoremlEncoder alloc] initWithMLModel:model];
handler(typedModel, nil);
} else {
handler(nil, error);
}
}];
}
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
}
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
if (!outFeatures) { return nil; }
return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
}
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
return [self predictionFromFeatures:input_ error:error];
}
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
if (!outBatch) { return nil; }
NSMutableArray<CoremlEncoderOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
for (NSInteger i = 0; i < outBatch.count; i++) {
id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
CoremlEncoderOutput * result = [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
[results addObject:result];
}
return results;
}
@end

View File

@ -1,22 +0,0 @@
// Wrapper of the Core ML Whisper Encoder model
//
// Code is derived from the work of Github user @wangchou
// ref: https://github.com/wangchou/callCoreMLFromCpp
#if __cplusplus
extern "C" {
#endif
struct whisper_coreml_context;
struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
void whisper_coreml_free(struct whisper_coreml_context * ctx);
void whisper_coreml_encode(
const whisper_coreml_context * ctx,
float * mel,
float * out);
#if __cplusplus
}
#endif

View File

@ -1,61 +0,0 @@
#import "coreml/whisper-encoder.h"
#import "coreml/whisper-encoder-impl.h"
#import <CoreML/CoreML.h>
#include <stdlib.h>
#if __cplusplus
extern "C" {
#endif
struct whisper_coreml_context {
const void * data;
};
struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
const void * data = CFBridgingRetain([[CoremlEncoder alloc] initWithContentsOfURL:url_model error:nil]);
if (data == NULL) {
return NULL;
}
whisper_coreml_context * ctx = new whisper_coreml_context;
ctx->data = data;
return ctx;
}
void whisper_coreml_free(struct whisper_coreml_context * ctx) {
CFRelease(ctx->data);
delete ctx;
}
void whisper_coreml_encode(
const whisper_coreml_context * ctx,
float * mel,
float * out) {
MLMultiArray * inMultiArray = [
[MLMultiArray alloc] initWithDataPointer: mel
shape: @[@1, @80, @3000]
dataType: MLMultiArrayDataTypeFloat32
strides: @[@(240000), @(3000), @1]
deallocator: nil
error: nil
];
CoremlEncoderOutput * outCoreML = [(__bridge id) ctx->data predictionFromMelSegment:inMultiArray error:nil];
MLMultiArray * outMA = outCoreML.output;
memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
}
#if __cplusplus
}
#endif

View File

@ -145,7 +145,15 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
var db = event.target.result;
var tx = db.transaction(['models'], 'readwrite');
var os = tx.objectStore('models');
var rq = os.put(data, url);
var rq = null;
try {
var rq = os.put(data, url);
} catch (e) {
cbPrint('loadRemote: failed to store "' + url + '" in the IndexedDB: \n' + e);
cbCancel();
return;
}
rq.onsuccess = function (event) {
cbPrint('loadRemote: "' + url + '" stored in the IndexedDB');
@ -180,7 +188,6 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
rq.onabort = function (event) {
cbPrint('loadRemote: failed to open IndexedDB: abort');
cbCancel();
};
}

View File

@ -352,14 +352,13 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
const int n_segments = whisper_full_n_segments(ctx);
fout << "start,end,text\n";
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(ctx, i);
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
fout << 10 * t0 << "," << 10 * t1 << ",\"" << text << "\"\n";
fout << 10 * t0 << ", " << 10 * t1 << ", \"" << text << "\"\n";
}
return true;

View File

@ -31,9 +31,9 @@ endif()
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE=8 \
-s INITIAL_MEMORY=1500MB \
-s TOTAL_MEMORY=1500MB \
-s PTHREAD_POOL_SIZE_STRICT=0 \
-s INITIAL_MEMORY=2000MB \
-s TOTAL_MEMORY=2000MB \
-s FORCE_FILESYSTEM=1 \
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
${EXTRA_FLAGS} \

View File

@ -10,6 +10,12 @@ std::thread g_worker;
std::vector<struct whisper_context *> g_contexts(4, nullptr);
static inline int mpow2(int n) {
int p = 1;
while (p <= n) p *= 2;
return p/2;
}
EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_worker.joinable()) {
@ -43,7 +49,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
}
}));
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, int nthreads, bool translate) {
if (g_worker.joinable()) {
g_worker.join();
}
@ -66,7 +72,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency())));
params.offset_ms = 0;
std::vector<float> pcmf32;

View File

@ -40,21 +40,34 @@
Note that the computation is quite heavy and may take a few seconds to complete.<br>
The transcription results will be displayed in the text area below.<br><br>
<b>Important: your browser must support WASM SIMD instructions for this to work.</b>
<b>Important:</b>
<ul>
<li>your browser must support WASM SIMD instructions for this to work</li>
<li>quantized models are still in experimental stage (<a href="https://github.com/ggerganov/ggml/pull/27">more info</a>)</li>
<li>Firefox cannot load files larger than 256 MB - use Chrome instead</li>
</ul>
<br><br><hr>
<hr>
<div id="model">
Whisper model: <span id="model-whisper-status"></span>
Whisper models: <span id="model-whisper-status"></span><br><br>
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
<button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
<button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
<button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button>
<button id="fetch-whisper-small" onclick="loadWhisper('small')">small (466 MB)</button>
<span id="fetch-whisper-progress"></span>
<input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
<br><br>
Quantized models:<br><br>
<button id="fetch-whisper-base-en-q4_0" onclick="loadWhisper('base-en-q4_0')">base.en (4bit, 49 MB)</button>
<button id="fetch-whisper-base-q4_0" onclick="loadWhisper('base-q4_0')">base (4bit, 49 MB)</button>
<button id="fetch-whisper-small-en-q4_0" onclick="loadWhisper('small-en-q4_0')">small.en (4bit, 152 MB)</button>
<button id="fetch-whisper-small-q4_0" onclick="loadWhisper('small-q4_0')">small (4bit, 152 MB)</button><br>
<button id="fetch-whisper-medium-en-q4_0" onclick="loadWhisper('medium-en-q4_0')">medium.en (4bit, 469 MB)</button>
<button id="fetch-whisper-medium-q4_0" onclick="loadWhisper('medium-q4_0')">medium (4bit, 469 MB)</button>
<button id="fetch-whisper-large-q4_0" onclick="loadWhisper('large-q4_0')">large (4bit, 985 MB)</button>
<span id="fetch-whisper-progress"></span>
</div>
<br>
@ -161,6 +174,12 @@
<option value="yi">Yiddish</option>
</select>
</td>
<!-- Slider to select number of threads between 1 and 16 -->
<td>
Threads:
<input type="range" id="threads" name="threads" min="1" max="16" value="8" onchange="changeThreads(this.value)" />
<span id="threads-value">8</span>
</td>
<td>
<button onclick="onProcess(false);">Transcribe</button>
</td>
@ -263,11 +282,13 @@
Module.FS_createDataFile("/", fname, buf, true, true);
model_whisper = fname;
//model_whisper = fname;
document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
document.getElementById('model').innerHTML = 'Model fetched: ' + model_whisper;
}
function loadFile(event, fname) {
@ -292,6 +313,15 @@
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
document.getElementById('fetch-whisper-base' ).style.display = 'none';
document.getElementById('fetch-whisper-small' ).style.display = 'none';
document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';
document.getElementById('whisper-file' ).style.display = 'none';
document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
}
@ -304,6 +334,14 @@
'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin',
'small': 'https://whisper.ggerganov.com/ggml-model-whisper-small.bin',
'base-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q4_0.bin',
'base-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base-q4_0.bin',
'small-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en-q4_0.bin',
'small-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small-q4_0.bin',
'medium-en-q4_0':'https://whisper.ggerganov.com/ggml-model-whisper-medium.en-q4_0.bin',
'medium-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-medium-q4_0.bin',
'large-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-large-q4_0.bin',
};
let sizes = {
@ -313,6 +351,14 @@
'base': 142,
'small.en': 466,
'small': 466,
'base-en-q4_0': 49,
'base-q4_0': 49,
'small-en-q4_0': 152,
'small-q4_0': 152,
'medium-en-q4_0': 469,
'medium-q4_0': 469,
'large-q4_0': 985,
};
let url = urls[model];
@ -327,6 +373,15 @@
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
document.getElementById('fetch-whisper-base' ).style.display = 'none';
document.getElementById('fetch-whisper-small' ).style.display = 'none';
document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';
document.getElementById('whisper-file' ).style.display = 'none';
document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;
@ -337,12 +392,22 @@
cbCancel = function() {
var el;
el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-tiny' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-en-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-en-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-medium-en-q4_0'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-medium-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-large-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('whisper-file' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
};
@ -354,7 +419,8 @@
// audio file
//
const kMaxAudio_s = 120;
const kMaxAudio_s = 30*60;
const kMaxRecording_s = 2*60;
const kSampleRate = 16000;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
@ -423,7 +489,7 @@
doRecording = false;
}
// record up to kMaxAudio_s seconds of audio from the microphone
// record up to kMaxRecording_s seconds of audio from the microphone
// check if doRecording is false every 1000 ms and stop recording if so
// update progress information
function startRecording() {
@ -479,9 +545,9 @@
printTextarea('js: audio recorded, size: ' + audio.length);
// truncate to first 30 seconds
if (audio.length > kMaxAudio_s*kSampleRate) {
audio = audio.slice(0, kMaxAudio_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
if (audio.length > kMaxRecording_s*kSampleRate) {
audio = audio.slice(0, kMaxRecording_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxRecording_s + ' seconds');
}
setAudio(audio);
});
@ -509,24 +575,31 @@
});
}
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxRecording_s) + '%';
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxRecording_s).toFixed(0) + '%';
}, 1000);
printTextarea('js: recording ...');
setTimeout(function() {
if (doRecording) {
printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
printTextarea('js: recording stopped after ' + kMaxRecording_s + ' seconds');
stopRecording();
}
}, kMaxAudio_s*1000);
}, kMaxRecording_s*1000);
}
//
// transcribe
//
var nthreads = 8;
function changeThreads(value) {
nthreads = value;
document.getElementById('threads-value').innerHTML = nthreads;
}
function onProcess(translate) {
if (!instance) {
instance = Module.init('whisper.bin');
@ -553,7 +626,7 @@
printTextarea('');
setTimeout(function() {
var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
var ret = Module.full_default(instance, audio, document.getElementById('language').value, nthreads, translate);
console.log('js: full_default returned: ' + ret);
if (ret) {
printTextarea("js: whisper returned: " + ret);

1790
ggml.c

File diff suppressed because it is too large Load Diff

7
ggml.h
View File

@ -198,6 +198,8 @@ struct ggml_object;
struct ggml_context;
enum ggml_type {
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_I8,
GGML_TYPE_I16,
GGML_TYPE_I32,
@ -326,7 +328,10 @@ void ggml_print_objects(const struct ggml_context * ctx);
int ggml_nelements(const struct ggml_tensor * tensor);
size_t ggml_nbytes (const struct ggml_tensor * tensor);
size_t ggml_type_size (enum ggml_type type);
int ggml_blck_size (enum ggml_type type);
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
size_t ggml_element_size(const struct ggml_tensor * tensor);
struct ggml_context * ggml_init(struct ggml_init_params params);

View File

@ -1,82 +0,0 @@
#!/bin/bash
# This script downloads Whisper model files that have already been converted to Core ML format.
# This way you don't have to convert them yourself.
src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
pfx="resolve/main/ggml"
# get the path of this script
function get_script_path() {
if [ -x "$(command -v realpath)" ]; then
echo "$(dirname $(realpath $0))"
else
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
echo "$ret"
fi
}
models_path="$(get_script_path)"
# Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models
function list_models {
printf "\n"
printf " Available models:"
for model in "${models[@]}"; do
printf " $model"
done
printf "\n\n"
}
if [ "$#" -ne 1 ]; then
printf "Usage: $0 <model>\n"
list_models
exit 1
fi
model=$1
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
printf "Invalid model: $model\n"
list_models
exit 1
fi
# download Core ML model
printf "Downloading Core ML model $model from '$src' ...\n"
cd $models_path
if [ -f "ggml-$model.mlmodel" ]; then
printf "Model $model already exists. Skipping download.\n"
exit 0
fi
if [ -x "$(command -v wget)" ]; then
wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
else
printf "Either wget or curl is required to download models.\n"
exit 1
fi
if [ $? -ne 0 ]; then
printf "Failed to download Core ML model $model \n"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi
printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
printf "Run the following command to compile it:\n\n"
printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
printf "You can now use it like this:\n\n"
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
printf "\n"

View File

@ -1,8 +1,5 @@
#define WHISPER_BUILD
#include "whisper.h"
#if WHISPER_USE_COREML
#include "coreml/whisper-encoder.h"
#endif
#include "ggml.h"
@ -255,12 +252,34 @@ static const std::map<e_model, size_t> MEM_REQ_SCRATCH3 = {
{ MODEL_LARGE, 9ull*MB },
};
static const std::map<e_model, size_t> MEM_REQ_MODEL = {
{ MODEL_TINY, 74ull*MB },
{ MODEL_BASE, 142ull*MB },
{ MODEL_SMALL, 466ull*MB },
{ MODEL_MEDIUM, 1464ull*MB },
{ MODEL_LARGE, 2952ull*MB },
static const std::map<ggml_type, std::map<e_model, size_t>> MEM_REQ_MODEL = {
{ GGML_TYPE_F16,
{
{ MODEL_TINY, 74ull*MB },
{ MODEL_BASE, 142ull*MB },
{ MODEL_SMALL, 466ull*MB },
{ MODEL_MEDIUM, 1464ull*MB },
{ MODEL_LARGE, 2952ull*MB },
},
},
{ GGML_TYPE_Q4_0,
{
{ MODEL_TINY, 26ull*MB },
{ MODEL_BASE, 50ull*MB },
{ MODEL_SMALL, 154ull*MB },
{ MODEL_MEDIUM, 470ull*MB },
{ MODEL_LARGE, 940ull*MB },
},
},
{ GGML_TYPE_Q4_1,
{
{ MODEL_TINY, 31ull*MB },
{ MODEL_BASE, 57ull*MB },
{ MODEL_SMALL, 181ull*MB },
{ MODEL_MEDIUM, 559ull*MB },
{ MODEL_LARGE, 1122ull*MB },
},
},
};
static const std::map<e_model, size_t> MEM_REQ_KV_SELF = {
@ -597,11 +616,6 @@ struct whisper_context {
int lang_id = 0; // english by default
std::string path_model; // populated by whisper_init_from_file()
#ifdef WHISPER_USE_COREML
whisper_coreml_context * ctx_coreml;
#endif
// [EXPERIMENTAL] token-level timestamps data
int64_t t_beg = 0;
int64_t t_last = 0;
@ -689,7 +703,7 @@ static bool kv_cache_reinit(struct whisper_kv_cache & cache) {
const ggml_type wtype = cache.k->type;
WHISPER_ASSERT(wtype == cache.v->type);
WHISPER_ASSERT(cache.buf.size() >= 2*n_elements*ggml_type_size(wtype));
WHISPER_ASSERT(cache.buf.size() >= 2*n_elements*ggml_type_sizef(wtype));
struct ggml_init_params params;
params.mem_size = cache.buf.size();
@ -784,12 +798,25 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
model.type = e_model::MODEL_LARGE;
}
// for the big tensors, we have the option to store the data in 16-bit floats
// for the big tensors, we have the option to store the data in 16-bit floats or quantized
// in order to save memory and also to speed up the computation
wctx.wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
wctx.wtype = GGML_TYPE_COUNT;
switch (model.hparams.f16) {
case 0: wctx.wtype = GGML_TYPE_F32; break;
case 1: wctx.wtype = GGML_TYPE_F16; break;
case 2: wctx.wtype = GGML_TYPE_Q4_0; break;
case 3: wctx.wtype = GGML_TYPE_Q4_1; break;
default:
{
fprintf(stderr, "%s: invalid model (bad f16 value %d)\n", __func__, model.hparams.f16);
return false;
}
}
const size_t scale = model.hparams.f16 ? 1 : 2;
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab);
fprintf(stderr, "%s: n_audio_ctx = %d\n", __func__, hparams.n_audio_ctx);
fprintf(stderr, "%s: n_audio_state = %d\n", __func__, hparams.n_audio_state);
@ -800,7 +827,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
fprintf(stderr, "%s: n_text_head = %d\n", __func__, hparams.n_text_head);
fprintf(stderr, "%s: n_text_layer = %d\n", __func__, hparams.n_text_layer);
fprintf(stderr, "%s: n_mels = %d\n", __func__, hparams.n_mels);
fprintf(stderr, "%s: f16 = %d\n", __func__, hparams.f16);
fprintf(stderr, "%s: ftype = %s\n", __func__, ftype_str[model.hparams.f16]);
fprintf(stderr, "%s: type = %d\n", __func__, model.type);
// print memory requirements
@ -811,7 +838,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
MEM_REQ_SCRATCH1.at (model.type) +
MEM_REQ_SCRATCH2.at (model.type) +
MEM_REQ_SCRATCH3.at (model.type) +
scale*MEM_REQ_MODEL.at (model.type) +
scale*MEM_REQ_MODEL.at(wctx.wtype).at(model.type) +
scale*MEM_REQ_KV_CROSS.at(model.type) +
scale*std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type));
@ -827,9 +854,9 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
// always have at least one decoder
wctx.model.buf = new std::vector<uint8_t>();
wctx.model.buf->resize(scale*MEM_REQ_MODEL.at(model.type));
wctx.model.buf->resize(scale*MEM_REQ_MODEL.at(wctx.wtype).at(model.type));
if (!kv_cache_init(model.hparams, scale*MEM_REQ_KV_SELF.at(model.type), wctx.decoders[0].kv_self, wctx.wtype, model.hparams.n_text_ctx)) {
if (!kv_cache_init(model.hparams, scale*MEM_REQ_KV_SELF.at(model.type), wctx.decoders[0].kv_self, GGML_TYPE_F16, model.hparams.n_text_ctx)) {
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
return false;
}
@ -839,7 +866,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
fprintf(stderr, "%s: kv self size = %7.2f MB\n", __func__, memory_size/1024.0/1024.0);
}
if (!kv_cache_init(model.hparams, scale*MEM_REQ_KV_CROSS.at(model.type), wctx.kv_cross, wctx.wtype, model.hparams.n_audio_ctx)) {
if (!kv_cache_init(model.hparams, scale*MEM_REQ_KV_CROSS.at(model.type), wctx.kv_cross, GGML_TYPE_F16, model.hparams.n_audio_ctx)) {
fprintf(stderr, "%s: kv_cache_init() failed for cross-attention cache\n", __func__);
return false;
}
@ -971,92 +998,92 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
// encoder
{
ctx_size += n_audio_ctx*n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_pe;
ctx_size += n_audio_ctx*n_audio_state*ggml_type_sizef(GGML_TYPE_F32); // e_pe;
ctx_size += 3*n_mels*n_audio_state*ggml_type_size(wtype); // e_conv_1_w
ctx_size += n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_conv_1_b
ctx_size += 3*n_mels*n_audio_state*ggml_type_sizef(GGML_TYPE_F16); // e_conv_1_w
ctx_size += n_audio_state*ggml_type_sizef(GGML_TYPE_F32); // e_conv_1_b
ctx_size += 3*n_audio_state*n_audio_state*ggml_type_size(wtype); // e_conv_2_w
ctx_size += n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_conv_2_b
ctx_size += 3*n_audio_state*n_audio_state*ggml_type_sizef(GGML_TYPE_F16); // e_conv_2_w
ctx_size += n_audio_state*ggml_type_sizef(GGML_TYPE_F32); // e_conv_2_b
ctx_size += n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_ln_w;
ctx_size += n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_ln_b;
ctx_size += n_audio_state*ggml_type_sizef(GGML_TYPE_F32); // e_ln_w;
ctx_size += n_audio_state*ggml_type_sizef(GGML_TYPE_F32); // e_ln_b;
}
// decoder
{
ctx_size += n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F32); // d_pe;
ctx_size += n_text_ctx*n_text_state*ggml_type_sizef(GGML_TYPE_F32); // d_pe;
ctx_size += n_vocab*n_text_state*ggml_type_size(wtype); // d_te;
ctx_size += n_vocab*n_text_state*ggml_type_sizef(wtype); // d_te;
ctx_size += n_text_state*ggml_type_size(GGML_TYPE_F32); // d_ln_w;
ctx_size += n_text_state*ggml_type_size(GGML_TYPE_F32); // d_ln_b;
ctx_size += n_text_state*ggml_type_sizef(GGML_TYPE_F32); // d_ln_w;
ctx_size += n_text_state*ggml_type_sizef(GGML_TYPE_F32); // d_ln_b;
}
// encoder layers
{
ctx_size += n_audio_layer*(n_audio_state*ggml_type_size(GGML_TYPE_F32)); // mlp_ln_w
ctx_size += n_audio_layer*(n_audio_state*ggml_type_size(GGML_TYPE_F32)); // mlp_ln_b
ctx_size += n_audio_layer*(n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_ln_w
ctx_size += n_audio_layer*(n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_ln_b
ctx_size += n_audio_layer*(4*n_audio_state*n_audio_state*ggml_type_size(wtype)); // mlp_0_w
ctx_size += n_audio_layer*( 4*n_audio_state*ggml_type_size(GGML_TYPE_F32)); // mlp_0_b
ctx_size += n_audio_layer*(4*n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // mlp_0_w
ctx_size += n_audio_layer*( 4*n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_0_b
ctx_size += n_audio_layer*(4*n_audio_state*n_audio_state*ggml_type_size(wtype)); // mlp_1_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_size(GGML_TYPE_F32)); // mlp_1_b
ctx_size += n_audio_layer*(4*n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // mlp_1_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_1_b
ctx_size += n_audio_layer*(n_audio_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_0_w
ctx_size += n_audio_layer*(n_audio_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_0_b
ctx_size += n_audio_layer*(n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_0_w
ctx_size += n_audio_layer*(n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_0_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_size(wtype)); // attn_q_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_size(GGML_TYPE_F32)); // attn_q_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // attn_q_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_q_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_size(wtype)); // attn_k_w
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // attn_k_w
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_size(wtype)); // attn_v_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_size(GGML_TYPE_F32)); // attn_v_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // attn_v_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_v_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_size(wtype)); // attn_ln_1_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_1_b
ctx_size += n_audio_layer*(n_audio_state*n_audio_state*ggml_type_sizef(wtype)); // attn_ln_1_w
ctx_size += n_audio_layer*( n_audio_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_1_b
}
// decoder layers
{
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // mlp_ln_w
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // mlp_ln_b
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_ln_w
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_ln_b
ctx_size += n_text_layer*(4*n_text_state*n_text_state*ggml_type_size(wtype)); // mlp_0_w
ctx_size += n_text_layer*( 4*n_text_state*ggml_type_size(GGML_TYPE_F32)); // mlp_0_b
ctx_size += n_text_layer*(4*n_text_state*n_text_state*ggml_type_sizef(wtype)); // mlp_0_w
ctx_size += n_text_layer*( 4*n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_0_b
ctx_size += n_text_layer*(4*n_text_state*n_text_state*ggml_type_size(wtype)); // mlp_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // mlp_1_b
ctx_size += n_text_layer*(4*n_text_state*n_text_state*ggml_type_sizef(wtype)); // mlp_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // mlp_1_b
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_0_w
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_0_b
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_0_w
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_0_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // attn_q_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // attn_q_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // attn_q_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_q_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // attn_k_w
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // attn_k_w
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // attn_v_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // attn_v_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // attn_v_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_v_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // attn_ln_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // attn_ln_1_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // attn_ln_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // attn_ln_1_b
//
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_0_w
ctx_size += n_text_layer*(n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_0_b
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // cross_attn_ln_0_w
ctx_size += n_text_layer*(n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // cross_attn_ln_0_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // cross_attn_q_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_q_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // cross_attn_q_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // cross_attn_q_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // cross_attn_k_w
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // cross_attn_k_w
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // cross_attn_v_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_v_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // cross_attn_v_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // cross_attn_v_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_size(wtype)); // cross_attn_ln_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
ctx_size += n_text_layer*(n_text_state*n_text_state*ggml_type_sizef(wtype)); // cross_attn_ln_1_w
ctx_size += n_text_layer*( n_text_state*ggml_type_sizef(GGML_TYPE_F32)); // cross_attn_ln_1_b
}
ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
@ -1102,10 +1129,10 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
{
model.e_pe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_audio_state, n_audio_ctx);
model.e_conv_1_w = ggml_new_tensor_3d(ctx, wtype, 3, n_mels, n_audio_state);
model.e_conv_1_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 3, n_mels, n_audio_state);
model.e_conv_1_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 1, n_audio_state);
model.e_conv_2_w = ggml_new_tensor_3d(ctx, wtype, 3, n_audio_state, n_audio_state);
model.e_conv_2_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 3, n_audio_state, n_audio_state);
model.e_conv_2_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 1, n_audio_state);
model.e_ln_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_audio_state);
@ -1321,9 +1348,21 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
return false;
}
const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
size_t bpe = 0;
if (nelements*bpe != ggml_nbytes(tensor)) {
switch (ftype) {
case 0: bpe = ggml_type_size(GGML_TYPE_F32); break;
case 1: bpe = ggml_type_size(GGML_TYPE_F16); break;
case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break;
case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break;
default:
{
fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype);
return false;
}
};
if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
return false;
@ -1405,7 +1444,6 @@ static bool whisper_encode(
}
}
#ifndef WHISPER_USE_COREML
struct ggml_tensor * cur;
// convolution + gelu
@ -1522,14 +1560,14 @@ static bool whisper_encode(
ggml_permute(ctx0,
ggml_cpy(ctx0,
Qcur,
ggml_new_tensor_3d(ctx0, wctx.wtype, n_state/n_head, n_head, n_ctx)),
ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
0, 2, 1, 3);
struct ggml_tensor * K =
ggml_permute(ctx0,
ggml_cpy(ctx0,
Kcur,
ggml_new_tensor_3d(ctx0, wctx.wtype, n_state/n_head, n_head, n_ctx)),
ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
0, 2, 1, 3);
struct ggml_tensor * V =
@ -1539,7 +1577,7 @@ static bool whisper_encode(
Vcur,
n_state/n_head, n_head, n_ctx),
1, 2, 0, 3),
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head)
ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_ctx, n_state/n_head, n_head)
);
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
@ -1555,7 +1593,7 @@ static bool whisper_encode(
ggml_permute(ctx0,
ggml_cpy(ctx0,
Kcur,
ggml_new_tensor_3d(ctx0, wctx.wtype, n_state/n_head, n_head, n_ctx)),
ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
0, 2, 1, 3);
// K * Q
@ -1573,7 +1611,7 @@ static bool whisper_encode(
// ggml_permute(ctx0,
// ggml_cpy(ctx0,
// Vcur,
// ggml_new_tensor_3d(ctx0, wctx.wtype, n_state/n_head, n_head, n_ctx)),
// ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
// 1, 2, 0, 3);
//struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);
@ -1585,7 +1623,7 @@ static bool whisper_encode(
Vcur,
n_state/n_head, n_head, n_ctx),
0, 2, 1, 3),
ggml_new_tensor_3d(ctx0, wctx.wtype, n_state/n_head, n_ctx, n_head)
ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_state/n_head, n_ctx, n_head)
);
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
@ -1643,7 +1681,7 @@ static bool whisper_encode(
wctx.use_buf(ctx0, 0);
cur = ggml_flash_ff(ctx0,
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, n_ctx)),
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, GGML_TYPE_F16, n_state, n_ctx)),
layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
#else
wctx.use_buf(ctx0, 0);
@ -1714,13 +1752,6 @@ static bool whisper_encode(
//ggml_graph_print(&gf);
}
#else
wctx.use_buf(ctx0, -1);
struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
#endif
// cur
//{
@ -2523,20 +2554,6 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
// interface implementation
//
#ifdef WHISPER_USE_COREML
// replace .bin with .mlmodelc
static std::string whisper_get_coreml_path(std::string path_bin) {
auto pos = path_bin.rfind('.');
if (pos != std::string::npos) {
path_bin = path_bin.substr(0, pos);
}
path_bin += ".mlmodelc";
return path_bin;
}
#endif
struct whisper_context * whisper_init_from_file(const char * path_model) {
whisper_model_loader loader = {};
@ -2549,7 +2566,6 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
}
loader.context = &fin;
loader.read = [](void * ctx, void * output, size_t read_size) {
std::ifstream * fin = (std::ifstream*)ctx;
fin->read((char *)output, read_size);
@ -2566,26 +2582,7 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
fin->close();
};
auto ctx = whisper_init(&loader);
if (ctx) {
ctx->path_model = path_model;
#ifdef WHISPER_USE_COREML
const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
if (!ctx->ctx_coreml) {
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
return nullptr;
}
fprintf(stderr, "%s: Core ML model loaded\n", __func__);
#endif
}
return ctx;
return whisper_init(&loader);
}
struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
@ -2657,10 +2654,6 @@ void whisper_free(struct whisper_context * ctx) {
ggml_free(ctx->decoders[i].kv_self.ctx);
}
}
#ifdef WHISPER_USE_COREML
whisper_coreml_free(ctx->ctx_coreml);
ctx->ctx_coreml = nullptr;
#endif
delete ctx;
}
}
@ -4499,23 +4492,32 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
// when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
std::vector<char> buf(4llu*N_max*N_max*sizeof(float) + 4*256);
// put a bunch of random data in the buffer
for (size_t i = 0; i < buf.size(); i++) buf[i] = i;
for (int j = 0; j < (int) sizes.size(); j++) {
int n_q4_0 = 0;
int n_q4_1 = 0;
int n_fp16 = 0;
int n_fp32 = 0;
// GFLOPS/s
double s_q4_0 = 0.0;
double s_q4_1 = 0.0;
double s_fp16 = 0.0;
double s_fp32 = 0.0;
const size_t N = sizes[j];
for (int k = 0; k < 2; ++k) {
const ggml_type wtype = k == 0 ? GGML_TYPE_F16 : GGML_TYPE_F32;
for (int k = 0; k < 4; ++k) {
const ggml_type wtype =
k == 0 ? GGML_TYPE_Q4_0 :
k == 1 ? GGML_TYPE_Q4_1 :
k == 2 ? GGML_TYPE_F16 :
GGML_TYPE_F32;
double & s = k == 0 ? s_fp16 : s_fp32;
int & n = k == 0 ? n_fp16 : n_fp32;
double & s = k == 0 ? s_q4_0 : k == 1 ? s_q4_1 : k == 2 ? s_fp16 : s_fp32;
int & n = k == 0 ? n_q4_0 : k == 1 ? n_q4_1 : k == 2 ? n_fp16 : n_fp32;
struct ggml_init_params gparams = {
/*.mem_size =*/ buf.size(),
@ -4558,8 +4560,8 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
s = ((2.0*N*N*N*n)/tsum)*1e-9;
}
fprintf(stderr, "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
N, N, s_fp16, n_fp16, s_fp32, n_fp32);
fprintf(stderr, "ggml_mul_mat: %4zu x %4zu: Q4_0 %7.1f GFLOPS (%3d runs) / Q4_1 %7.1f GFLOPS (%3d runs) / F16 %7.1f GFLOPS (%3d runs) / F32 %7.1f GFLOPS (%3d runs)\n",
N, N, s_q4_0, n_q4_0, s_q4_1, n_q4_1, s_fp16, n_fp16, s_fp32, n_fp32);
}
return 0;