forked from extern/whisper.cpp
coreml : use Core ML encoder inference
This commit is contained in:
parent
72af0f5697
commit
b0ac915265
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,5 +1,7 @@
|
||||
*.o
|
||||
*.a
|
||||
*.mlmodel
|
||||
*.mlmodelc
|
||||
.cache/
|
||||
.vs/
|
||||
.vscode/
|
||||
|
@ -54,6 +54,8 @@ if (APPLE)
|
||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
||||
|
||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||
else()
|
||||
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
||||
endif()
|
||||
@ -86,16 +88,33 @@ endif()
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# on APPLE - include Accelerate framework
|
||||
if (APPLE AND NOT WHISPER_NO_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
message(STATUS "Accelerate framework found")
|
||||
# on APPLE
|
||||
if (APPLE)
|
||||
# include Accelerate framework
|
||||
if (NOT WHISPER_NO_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
||||
else()
|
||||
message(WARNING "Accelerate framework not found")
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
message(STATUS "Accelerate framework found")
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
||||
else()
|
||||
message(WARNING "Accelerate framework not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WHISPER_COREML)
|
||||
find_library(FOUNDATION_FRAMEWORK Foundation)
|
||||
find_library(COREML_FRAMEWORK CoreML)
|
||||
|
||||
if (COREML_FRAMEWORK)
|
||||
message(STATUS "CoreML framework found")
|
||||
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
|
||||
else()
|
||||
message(WARNING "CoreML framework not found")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -181,6 +200,33 @@ if (WHISPER_PERF)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
||||
endif()
|
||||
|
||||
#
|
||||
# whisper.coreml - Core ML support
|
||||
#
|
||||
|
||||
if (WHISPER_COREML)
|
||||
set(TARGET whisper.coreml)
|
||||
|
||||
add_library(${TARGET}
|
||||
coreml/whisper-encoder.h
|
||||
coreml/whisper-encoder.mm
|
||||
coreml/whisper-encoder-impl.h
|
||||
coreml/whisper-encoder-impl.m
|
||||
)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES
|
||||
COMPILE_FLAGS "-fobjc-arc"
|
||||
)
|
||||
endif()
|
||||
|
||||
#
|
||||
# whisper - this is the main library of the project
|
||||
#
|
||||
@ -200,6 +246,10 @@ target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
if (WHISPER_COREML)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper.coreml)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
|
44
Makefile
44
Makefile
@ -132,6 +132,10 @@ ifndef WHISPER_NO_ACCELERATE
|
||||
LDFLAGS += -framework Accelerate
|
||||
endif
|
||||
endif
|
||||
ifdef WHISPER_COREML
|
||||
CXXFLAGS += -DWHISPER_USE_COREML
|
||||
LDFLAGS += -framework Foundation -framework CoreML
|
||||
endif
|
||||
ifdef WHISPER_OPENBLAS
|
||||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||
LDFLAGS += -lopenblas
|
||||
@ -184,11 +188,23 @@ ggml.o: ggml.c ggml.h
|
||||
whisper.o: whisper.cpp whisper.h
|
||||
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
|
||||
|
||||
libwhisper.a: ggml.o whisper.o
|
||||
$(AR) rcs libwhisper.a ggml.o whisper.o
|
||||
ifndef WHISPER_COREML
|
||||
WHISPER_OBJ = whisper.o
|
||||
else
|
||||
whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
|
||||
$(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
|
||||
|
||||
libwhisper.so: ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
|
||||
whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
|
||||
$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
|
||||
|
||||
WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
|
||||
endif
|
||||
|
||||
libwhisper.a: ggml.o $(WHISPER_OBJ)
|
||||
$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
|
||||
|
||||
libwhisper.so: ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
|
||||
@ -202,21 +218,21 @@ CC_SDL=`sdl2-config --cflags --libs`
|
||||
SRC_COMMON = examples/common.cpp
|
||||
SRC_COMMON_SDL = examples/common-sdl.cpp
|
||||
|
||||
main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
|
||||
main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
|
||||
./main -h
|
||||
|
||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
|
||||
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
|
||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
bench: examples/bench/bench.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
|
||||
bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
|
||||
|
||||
#
|
||||
# Audio samples
|
||||
|
142
coreml/whisper-encoder-impl.h
Normal file
142
coreml/whisper-encoder-impl.h
Normal file
@ -0,0 +1,142 @@
|
||||
//
|
||||
// CoremlEncoder.h
|
||||
//
|
||||
// This file was automatically generated and should not be edited.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
#import <CoreML/CoreML.h>
|
||||
#include <stdint.h>
|
||||
#include <os/log.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
|
||||
/// Model Prediction Input Type
|
||||
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
||||
@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
|
||||
|
||||
/// melSegment as 1 × 80 × 3000 3-dimensional array of floats
|
||||
@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
|
||||
- (instancetype)init NS_UNAVAILABLE;
|
||||
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
/// Model Prediction Output Type
|
||||
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
||||
@interface CoremlEncoderOutput : NSObject<MLFeatureProvider>
|
||||
|
||||
/// output as multidimensional array of floats
|
||||
@property (readwrite, nonatomic, strong) MLMultiArray * output;
|
||||
- (instancetype)init NS_UNAVAILABLE;
|
||||
- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
/// Class for model loading and prediction
|
||||
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
||||
@interface CoremlEncoder : NSObject
|
||||
@property (readonly, nonatomic, nullable) MLModel * model;
|
||||
|
||||
/**
|
||||
URL of the underlying .mlmodelc directory.
|
||||
*/
|
||||
+ (nullable NSURL *)URLOfModelInThisBundle;
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from an existing MLModel object.
|
||||
|
||||
Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
|
||||
Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
|
||||
*/
|
||||
- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance with the model in this bundle.
|
||||
*/
|
||||
- (nullable instancetype)init;
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance with the model in this bundle.
|
||||
|
||||
@param configuration The model configuration object
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from the model URL.
|
||||
|
||||
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from the model URL.
|
||||
|
||||
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
|
||||
@param configuration The model configuration object
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Construct CoremlEncoder instance asynchronously with configuration.
|
||||
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
|
||||
|
||||
@param configuration The model configuration
|
||||
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
|
||||
*/
|
||||
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
|
||||
|
||||
/**
|
||||
Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
|
||||
|
||||
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
|
||||
|
||||
@param modelURL The model URL.
|
||||
@param configuration The model configuration
|
||||
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
|
||||
*/
|
||||
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
|
||||
|
||||
/**
|
||||
Make a prediction using the standard interface
|
||||
@param input an instance of CoremlEncoderInput to predict from
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
@return the prediction as CoremlEncoderOutput
|
||||
*/
|
||||
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Make a prediction using the standard interface
|
||||
@param input an instance of CoremlEncoderInput to predict from
|
||||
@param options prediction options
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
@return the prediction as CoremlEncoderOutput
|
||||
*/
|
||||
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Make a prediction using the convenience interface
|
||||
@param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
@return the prediction as CoremlEncoderOutput
|
||||
*/
|
||||
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
|
||||
/**
|
||||
Batch prediction
|
||||
@param inputArray array of CoremlEncoderInput instances to obtain predictions from
|
||||
@param options prediction options
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
@return the predictions as NSArray<CoremlEncoderOutput *>
|
||||
*/
|
||||
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
197
coreml/whisper-encoder-impl.m
Normal file
197
coreml/whisper-encoder-impl.m
Normal file
@ -0,0 +1,197 @@
|
||||
//
|
||||
// CoremlEncoder.m
|
||||
//
|
||||
// This file was automatically generated and should not be edited.
|
||||
//
|
||||
|
||||
#if !__has_feature(objc_arc)
|
||||
#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
|
||||
#endif
|
||||
|
||||
#import "whisper-encoder-impl.h"
|
||||
|
||||
@implementation CoremlEncoderInput
|
||||
|
||||
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_melSegment = melSegment;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (NSSet<NSString *> *)featureNames {
|
||||
return [NSSet setWithArray:@[@"melSegment"]];
|
||||
}
|
||||
|
||||
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
|
||||
if ([featureName isEqualToString:@"melSegment"]) {
|
||||
return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@implementation CoremlEncoderOutput
|
||||
|
||||
- (instancetype)initWithOutput:(MLMultiArray *)output {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_output = output;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (NSSet<NSString *> *)featureNames {
|
||||
return [NSSet setWithArray:@[@"output"]];
|
||||
}
|
||||
|
||||
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
|
||||
if ([featureName isEqualToString:@"output"]) {
|
||||
return [MLFeatureValue featureValueWithMultiArray:self.output];
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@implementation CoremlEncoder
|
||||
|
||||
|
||||
/**
|
||||
URL of the underlying .mlmodelc directory.
|
||||
*/
|
||||
+ (nullable NSURL *)URLOfModelInThisBundle {
|
||||
NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"CoremlEncoder" ofType:@"mlmodelc"];
|
||||
if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load CoremlEncoder.mlmodelc in the bundle resource"); return nil; }
|
||||
return [NSURL fileURLWithPath:assetPath];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from an existing MLModel object.
|
||||
|
||||
Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
|
||||
Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
|
||||
*/
|
||||
- (instancetype)initWithMLModel:(MLModel *)model {
|
||||
self = [super init];
|
||||
if (!self) { return nil; }
|
||||
_model = model;
|
||||
if (_model == nil) { return nil; }
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance with the model in this bundle.
|
||||
*/
|
||||
- (nullable instancetype)init {
|
||||
return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance with the model in this bundle.
|
||||
|
||||
@param configuration The model configuration object
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from the model URL.
|
||||
|
||||
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
|
||||
if (model == nil) { return nil; }
|
||||
return [self initWithMLModel:model];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize CoremlEncoder instance from the model URL.
|
||||
|
||||
@param modelURL URL to the .mlmodelc directory for CoremlEncoder.
|
||||
@param configuration The model configuration object
|
||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||
*/
|
||||
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
|
||||
if (model == nil) { return nil; }
|
||||
return [self initWithMLModel:model];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Construct CoremlEncoder instance asynchronously with configuration.
|
||||
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
|
||||
|
||||
@param configuration The model configuration
|
||||
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
|
||||
*/
|
||||
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
|
||||
[self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
|
||||
configuration:configuration
|
||||
completionHandler:handler];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
|
||||
|
||||
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
|
||||
|
||||
@param modelURL The model URL.
|
||||
@param configuration The model configuration
|
||||
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
|
||||
*/
|
||||
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
|
||||
[MLModel loadContentsOfURL:modelURL
|
||||
configuration:configuration
|
||||
completionHandler:^(MLModel *model, NSError *error) {
|
||||
if (model != nil) {
|
||||
CoremlEncoder *typedModel = [[CoremlEncoder alloc] initWithMLModel:model];
|
||||
handler(typedModel, nil);
|
||||
} else {
|
||||
handler(nil, error);
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
|
||||
}
|
||||
|
||||
- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
|
||||
if (!outFeatures) { return nil; }
|
||||
return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
|
||||
}
|
||||
|
||||
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
|
||||
return [self predictionFromFeatures:input_ error:error];
|
||||
}
|
||||
|
||||
- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
|
||||
id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
|
||||
id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
|
||||
if (!outBatch) { return nil; }
|
||||
NSMutableArray<CoremlEncoderOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
|
||||
for (NSInteger i = 0; i < outBatch.count; i++) {
|
||||
id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
|
||||
CoremlEncoderOutput * result = [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
|
||||
[results addObject:result];
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
@end
|
22
coreml/whisper-encoder.h
Normal file
22
coreml/whisper-encoder.h
Normal file
@ -0,0 +1,22 @@
|
||||
// Wrapper of the Core ML Whisper Encoder model
|
||||
//
|
||||
// Code is derived from the work of Github user @wangchou
|
||||
// ref: https://github.com/wangchou/callCoreMLFromCpp
|
||||
|
||||
#if __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct whisper_coreml_context;
|
||||
|
||||
struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
|
||||
void whisper_coreml_free(struct whisper_coreml_context * ctx);
|
||||
|
||||
void whisper_coreml_encode(
|
||||
const whisper_coreml_context * ctx,
|
||||
float * mel,
|
||||
float * out);
|
||||
|
||||
#if __cplusplus
|
||||
}
|
||||
#endif
|
61
coreml/whisper-encoder.mm
Normal file
61
coreml/whisper-encoder.mm
Normal file
@ -0,0 +1,61 @@
|
||||
#import "coreml/whisper-encoder.h"
|
||||
#import "coreml/whisper-encoder-impl.h"
|
||||
|
||||
#import <CoreML/CoreML.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct whisper_coreml_context {
|
||||
const void * data;
|
||||
};
|
||||
|
||||
struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
|
||||
NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
|
||||
|
||||
NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
|
||||
|
||||
const void * data = CFBridgingRetain([[CoremlEncoder alloc] initWithContentsOfURL:url_model error:nil]);
|
||||
|
||||
if (data == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
whisper_coreml_context * ctx = new whisper_coreml_context;
|
||||
|
||||
ctx->data = data;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void whisper_coreml_free(struct whisper_coreml_context * ctx) {
|
||||
CFRelease(ctx->data);
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
void whisper_coreml_encode(
|
||||
const whisper_coreml_context * ctx,
|
||||
float * mel,
|
||||
float * out) {
|
||||
MLMultiArray * inMultiArray = [
|
||||
[MLMultiArray alloc] initWithDataPointer: mel
|
||||
shape: @[@1, @80, @3000]
|
||||
dataType: MLMultiArrayDataTypeFloat32
|
||||
strides: @[@(240000), @(3000), @1]
|
||||
deallocator: nil
|
||||
error: nil
|
||||
];
|
||||
|
||||
CoremlEncoderOutput * outCoreML = [(__bridge id) ctx->data predictionFromMelSegment:inMultiArray error:nil];
|
||||
|
||||
MLMultiArray * outMA = outCoreML.output;
|
||||
|
||||
memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
|
||||
}
|
||||
|
||||
#if __cplusplus
|
||||
}
|
||||
#endif
|
82
models/download-coreml-model.sh
Executable file
82
models/download-coreml-model.sh
Executable file
@ -0,0 +1,82 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script downloads Whisper model files that have already been converted to Core ML format.
|
||||
# This way you don't have to convert them yourself.
|
||||
|
||||
src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
|
||||
pfx="resolve/main/ggml"
|
||||
|
||||
# get the path of this script
|
||||
function get_script_path() {
|
||||
if [ -x "$(command -v realpath)" ]; then
|
||||
echo "$(dirname $(realpath $0))"
|
||||
else
|
||||
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
|
||||
echo "$ret"
|
||||
fi
|
||||
}
|
||||
|
||||
models_path="$(get_script_path)"
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
printf "\n"
|
||||
printf " Available models:"
|
||||
for model in "${models[@]}"; do
|
||||
printf " $model"
|
||||
done
|
||||
printf "\n\n"
|
||||
}
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
printf "Usage: $0 <model>\n"
|
||||
list_models
|
||||
|
||||
exit 1
|
||||
fi
|
||||
|
||||
model=$1
|
||||
|
||||
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
|
||||
printf "Invalid model: $model\n"
|
||||
list_models
|
||||
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# download Core ML model
|
||||
|
||||
printf "Downloading Core ML model $model from '$src' ...\n"
|
||||
|
||||
cd $models_path
|
||||
|
||||
if [ -f "ggml-$model.mlmodel" ]; then
|
||||
printf "Model $model already exists. Skipping download.\n"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -x "$(command -v wget)" ]; then
|
||||
wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
|
||||
elif [ -x "$(command -v curl)" ]; then
|
||||
curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
|
||||
else
|
||||
printf "Either wget or curl is required to download models.\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
printf "Failed to download Core ML model $model \n"
|
||||
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
|
||||
printf "Run the following command to compile it:\n\n"
|
||||
printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
|
||||
printf "You can now use it like this:\n\n"
|
||||
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
|
||||
printf "\n"
|
49
whisper.cpp
49
whisper.cpp
@ -1,5 +1,8 @@
|
||||
#define WHISPER_BUILD
|
||||
#include "whisper.h"
|
||||
#if WHISPER_USE_COREML
|
||||
#include "coreml/whisper-encoder.h"
|
||||
#endif
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
@ -594,6 +597,11 @@ struct whisper_context {
|
||||
|
||||
int lang_id = 0; // english by default
|
||||
|
||||
std::string path_model; // populated by whisper_init_from_file()
|
||||
#ifdef WHISPER_USE_COREML
|
||||
whisper_coreml_context * ctx_coreml;
|
||||
#endif
|
||||
|
||||
// [EXPERIMENTAL] token-level timestamps data
|
||||
int64_t t_beg = 0;
|
||||
int64_t t_last = 0;
|
||||
@ -1696,6 +1704,9 @@ static bool whisper_encode(
|
||||
wctx.use_buf(ctx0, -1);
|
||||
|
||||
// run the computation
|
||||
#ifdef WHISPER_USE_COREML
|
||||
whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
|
||||
#else
|
||||
{
|
||||
struct ggml_cgraph gf = {};
|
||||
gf.n_threads = n_threads;
|
||||
@ -1705,6 +1716,7 @@ static bool whisper_encode(
|
||||
|
||||
//ggml_graph_print(&gf);
|
||||
}
|
||||
#endif
|
||||
|
||||
// cur
|
||||
//{
|
||||
@ -2507,6 +2519,20 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
|
||||
// interface implementation
|
||||
//
|
||||
|
||||
#ifdef WHISPER_USE_COREML
|
||||
// replace .bin with .mlmodelc
|
||||
static std::string whisper_get_coreml_path(std::string path_bin) {
|
||||
auto pos = path_bin.rfind('.');
|
||||
if (pos != std::string::npos) {
|
||||
path_bin = path_bin.substr(0, pos);
|
||||
}
|
||||
|
||||
path_bin += ".mlmodelc";
|
||||
|
||||
return path_bin;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct whisper_context * whisper_init_from_file(const char * path_model) {
|
||||
whisper_model_loader loader = {};
|
||||
|
||||
@ -2519,6 +2545,7 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
|
||||
}
|
||||
|
||||
loader.context = &fin;
|
||||
|
||||
loader.read = [](void * ctx, void * output, size_t read_size) {
|
||||
std::ifstream * fin = (std::ifstream*)ctx;
|
||||
fin->read((char *)output, read_size);
|
||||
@ -2535,7 +2562,23 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
|
||||
fin->close();
|
||||
};
|
||||
|
||||
return whisper_init(&loader);
|
||||
auto ctx = whisper_init(&loader);
|
||||
|
||||
if (ctx) {
|
||||
ctx->path_model = path_model;
|
||||
#ifdef WHISPER_USE_COREML
|
||||
const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
|
||||
fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
||||
|
||||
ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
|
||||
if (!ctx->ctx_coreml) {
|
||||
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
|
||||
@ -2607,6 +2650,10 @@ void whisper_free(struct whisper_context * ctx) {
|
||||
ggml_free(ctx->decoders[i].kv_self.ctx);
|
||||
}
|
||||
}
|
||||
#ifdef WHISPER_USE_COREML
|
||||
whisper_coreml_free(ctx->ctx_coreml);
|
||||
ctx->ctx_coreml = nullptr;
|
||||
#endif
|
||||
delete ctx;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user