*.o
*.a
.cache/
+.coreml/
.test/
.vs/
.vscode/
extra/bench-gg.txt
-*.mlmodel*
+models/*.mlmodel
+models/*.mlmodelc
+models/*.mlpackage
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
+
+ option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
else()
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
endif()
find_package(Threads REQUIRED)
-# on APPLE - include Accelerate framework
-if (APPLE AND NOT WHISPER_NO_ACCELERATE)
- find_library(ACCELERATE_FRAMEWORK Accelerate)
- if (ACCELERATE_FRAMEWORK)
- message(STATUS "Accelerate framework found")
+# on APPLE
+if (APPLE)
+ # include Accelerate framework
+ if (NOT WHISPER_NO_ACCELERATE)
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
+
+ if (ACCELERATE_FRAMEWORK)
+ message(STATUS "Accelerate framework found")
- set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
- set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
- else()
- message(WARNING "Accelerate framework not found")
+ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
+ else()
+ message(WARNING "Accelerate framework not found")
+ endif()
+ endif()
+
+ if (WHISPER_COREML)
+ find_library(FOUNDATION_FRAMEWORK Foundation)
+ find_library(COREML_FRAMEWORK CoreML)
+
+ if (COREML_FRAMEWORK)
+ message(STATUS "CoreML framework found")
+
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
+ else()
+ message(WARNING "CoreML framework not found")
+ endif()
endif()
endif()
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
endif()
+#
+# whisper.coreml - Core ML support
+#
+
+if (WHISPER_COREML)
+ set(TARGET whisper.coreml)
+
+ add_library(${TARGET}
+ coreml/whisper-encoder.h
+ coreml/whisper-encoder.mm
+ coreml/whisper-encoder-impl.h
+ coreml/whisper-encoder-impl.m
+ )
+
+ include(DefaultTargetOptions)
+
+ target_include_directories(${TARGET} PUBLIC
+ .
+ )
+
+ target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
+
+ set_target_properties(${TARGET} PROPERTIES
+ COMPILE_FLAGS "-fobjc-arc"
+ )
+endif()
+
#
# whisper - this is the main library of the project
#
.
)
+if (WHISPER_COREML)
+ target_link_libraries(${TARGET} PRIVATE whisper.coreml)
+endif()
+
if (MSVC)
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
LDFLAGS += -framework Accelerate
endif
endif
+ifdef WHISPER_COREML
+ CXXFLAGS += -DWHISPER_USE_COREML
+ LDFLAGS += -framework Foundation -framework CoreML
+endif
ifdef WHISPER_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
whisper.o: whisper.cpp whisper.h ggml.h
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
-libwhisper.a: ggml.o whisper.o
- $(AR) rcs libwhisper.a ggml.o whisper.o
+ifndef WHISPER_COREML
+WHISPER_OBJ = whisper.o
+else
+whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
+ $(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
+
+whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
+ $(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
+
+WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
+endif
+
+libwhisper.a: ggml.o $(WHISPER_OBJ)
+ $(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
-libwhisper.so: ggml.o whisper.o
- $(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
+libwhisper.so: ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
clean:
rm -f *.o main stream command talk talk-llama bench libwhisper.a libwhisper.so
SRC_COMMON = examples/common.cpp
SRC_COMMON_SDL = examples/common-sdl.cpp
-main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
./main -h
-bench: examples/bench/bench.cpp ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
-talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
- $(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk-llama $(CC_SDL) $(LDFLAGS)
+talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+ $(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
#
# Audio samples
--- /dev/null
+//
+// whisper-decoder-impl.h
+//
+// This file was automatically generated and should not be edited.
+//
+
+#import <Foundation/Foundation.h>
+#import <CoreML/CoreML.h>
+#include <stdint.h>
+#include <os/log.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+
+/// Model Prediction Input Type
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_decoder_implInput : NSObject<MLFeatureProvider>
+
+/// token_data as 1 by 1 matrix of 32-bit integers
+@property (readwrite, nonatomic, strong) MLMultiArray * token_data;
+
+/// audio_data as 1 × 384 × 1 × 1500 4-dimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * audio_data;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Model Prediction Output Type
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_decoder_implOutput : NSObject<MLFeatureProvider>
+
+/// var_1346 as multidimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * var_1346;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithVar_1346:(MLMultiArray *)var_1346 NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Class for model loading and prediction
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_decoder_impl : NSObject
+@property (readonly, nonatomic, nullable) MLModel * model;
+
+/**
+ URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle;
+
+/**
+ Initialize whisper_decoder_impl instance from an existing MLModel object.
+
+ Usually the application does not use this initializer unless it makes a subclass of whisper_decoder_impl.
+ Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
+
+/**
+ Initialize whisper_decoder_impl instance with the model in this bundle.
+*/
+- (nullable instancetype)init;
+
+/**
+ Initialize whisper_decoder_impl instance with the model in this bundle.
+
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Initialize whisper_decoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_decoder_impl.
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Initialize whisper_decoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_decoder_impl.
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Construct whisper_decoder_impl instance asynchronously with configuration.
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
+
+/**
+ Construct whisper_decoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param modelURL The model URL.
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
+
+/**
+ Make a prediction using the standard interface
+ @param input an instance of whisper_decoder_implInput to predict from
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_decoder_implOutput
+*/
+- (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Make a prediction using the standard interface
+ @param input an instance of whisper_decoder_implInput to predict from
+ @param options prediction options
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_decoder_implOutput
+*/
+- (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Make a prediction using the convenience interface
+ @param token_data as 1 by 1 matrix of 32-bit integers:
+ @param audio_data as 1 × 384 × 1 × 1500 4-dimensional array of floats:
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_decoder_implOutput
+*/
+- (nullable whisper_decoder_implOutput *)predictionFromToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Batch prediction
+ @param inputArray array of whisper_decoder_implInput instances to obtain predictions from
+ @param options prediction options
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the predictions as NSArray<whisper_decoder_implOutput *>
+*/
+- (nullable NSArray<whisper_decoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_decoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+@end
+
+NS_ASSUME_NONNULL_END
--- /dev/null
+//
+// whisper-decoder-impl.m
+//
+// This file was automatically generated and should not be edited.
+//
+
+#if !__has_feature(objc_arc)
+#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
+#endif
+
+#import "whisper-decoder-impl.h"
+
+@implementation whisper_decoder_implInput
+
+- (instancetype)initWithToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data {
+ self = [super init];
+ if (self) {
+ _token_data = token_data;
+ _audio_data = audio_data;
+ }
+ return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+ return [NSSet setWithArray:@[@"token_data", @"audio_data"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+ if ([featureName isEqualToString:@"token_data"]) {
+ return [MLFeatureValue featureValueWithMultiArray:self.token_data];
+ }
+ if ([featureName isEqualToString:@"audio_data"]) {
+ return [MLFeatureValue featureValueWithMultiArray:self.audio_data];
+ }
+ return nil;
+}
+
+@end
+
+@implementation whisper_decoder_implOutput
+
+- (instancetype)initWithVar_1346:(MLMultiArray *)var_1346 {
+ self = [super init];
+ if (self) {
+ _var_1346 = var_1346;
+ }
+ return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+ return [NSSet setWithArray:@[@"var_1346"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+ if ([featureName isEqualToString:@"var_1346"]) {
+ return [MLFeatureValue featureValueWithMultiArray:self.var_1346];
+ }
+ return nil;
+}
+
+@end
+
+@implementation whisper_decoder_impl
+
+
+/**
+ URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle {
+ NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"whisper_decoder_impl" ofType:@"mlmodelc"];
+ if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load whisper-decoder-impl.mlmodelc in the bundle resource"); return nil; }
+ return [NSURL fileURLWithPath:assetPath];
+}
+
+
+/**
+ Initialize whisper_decoder_impl instance from an existing MLModel object.
+
+ Usually the application does not use this initializer unless it makes a subclass of whisper_decoder_impl.
+ Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model {
+ self = [super init];
+ if (!self) { return nil; }
+ _model = model;
+ if (_model == nil) { return nil; }
+ return self;
+}
+
+
+/**
+ Initialize whisper_decoder_impl instance with the model in this bundle.
+*/
+- (nullable instancetype)init {
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
+}
+
+
+/**
+ Initialize whisper_decoder_impl instance with the model in this bundle.
+
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
+}
+
+
+/**
+ Initialize whisper_decoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_decoder_impl.
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
+ if (model == nil) { return nil; }
+ return [self initWithMLModel:model];
+}
+
+
+/**
+ Initialize whisper_decoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_decoder_impl.
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
+ if (model == nil) { return nil; }
+ return [self initWithMLModel:model];
+}
+
+
+/**
+ Construct whisper_decoder_impl instance asynchronously with configuration.
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler {
+ [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
+ configuration:configuration
+ completionHandler:handler];
+}
+
+
+/**
+ Construct whisper_decoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param modelURL The model URL.
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler {
+ [MLModel loadContentsOfURL:modelURL
+ configuration:configuration
+ completionHandler:^(MLModel *model, NSError *error) {
+ if (model != nil) {
+ whisper_decoder_impl *typedModel = [[whisper_decoder_impl alloc] initWithMLModel:model];
+ handler(typedModel, nil);
+ } else {
+ handler(nil, error);
+ }
+ }];
+}
+
+- (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
+}
+
+- (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
+ if (!outFeatures) { return nil; }
+ return [[whisper_decoder_implOutput alloc] initWithVar_1346:(MLMultiArray *)[outFeatures featureValueForName:@"var_1346"].multiArrayValue];
+}
+
+- (nullable whisper_decoder_implOutput *)predictionFromToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ whisper_decoder_implInput *input_ = [[whisper_decoder_implInput alloc] initWithToken_data:token_data audio_data:audio_data];
+ return [self predictionFromFeatures:input_ error:error];
+}
+
+- (nullable NSArray<whisper_decoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_decoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
+ id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
+ if (!outBatch) { return nil; }
+ NSMutableArray<whisper_decoder_implOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
+ for (NSInteger i = 0; i < outBatch.count; i++) {
+ id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
+ whisper_decoder_implOutput * result = [[whisper_decoder_implOutput alloc] initWithVar_1346:(MLMultiArray *)[resultProvider featureValueForName:@"var_1346"].multiArrayValue];
+ [results addObject:result];
+ }
+ return results;
+}
+
+@end
--- /dev/null
+//
+// whisper-encoder-impl.h
+//
+// This file was automatically generated and should not be edited.
+//
+
+#import <Foundation/Foundation.h>
+#import <CoreML/CoreML.h>
+#include <stdint.h>
+#include <os/log.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+
+/// Model Prediction Input Type
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_encoder_implInput : NSObject<MLFeatureProvider>
+
+/// logmel_data as 1 × 80 × 3000 3-dimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * logmel_data;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithLogmel_data:(MLMultiArray *)logmel_data NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Model Prediction Output Type
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_encoder_implOutput : NSObject<MLFeatureProvider>
+
+/// output as multidimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * output;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Class for model loading and prediction
+API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
+@interface whisper_encoder_impl : NSObject
+@property (readonly, nonatomic, nullable) MLModel * model;
+
+/**
+ URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle;
+
+/**
+ Initialize whisper_encoder_impl instance from an existing MLModel object.
+
+ Usually the application does not use this initializer unless it makes a subclass of whisper_encoder_impl.
+ Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
+
+/**
+ Initialize whisper_encoder_impl instance with the model in this bundle.
+*/
+- (nullable instancetype)init;
+
+/**
+ Initialize whisper_encoder_impl instance with the model in this bundle.
+
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Initialize whisper_encoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Initialize whisper_encoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Construct whisper_encoder_impl instance asynchronously with configuration.
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler;
+
+/**
+ Construct whisper_encoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param modelURL The model URL.
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler;
+
+/**
+ Make a prediction using the standard interface
+ @param input an instance of whisper_encoder_implInput to predict from
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_encoder_implOutput
+*/
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Make a prediction using the standard interface
+ @param input an instance of whisper_encoder_implInput to predict from
+ @param options prediction options
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_encoder_implOutput
+*/
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Make a prediction using the convenience interface
+ @param logmel_data as 1 × 80 × 3000 3-dimensional array of floats:
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the prediction as whisper_encoder_implOutput
+*/
+- (nullable whisper_encoder_implOutput *)predictionFromLogmel_data:(MLMultiArray *)logmel_data error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+ Batch prediction
+ @param inputArray array of whisper_encoder_implInput instances to obtain predictions from
+ @param options prediction options
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+ @return the predictions as NSArray<whisper_encoder_implOutput *>
+*/
+- (nullable NSArray<whisper_encoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_encoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+@end
+
+NS_ASSUME_NONNULL_END
--- /dev/null
+//
+// whisper-encoder-impl.m
+//
+// This file was automatically generated and should not be edited.
+//
+
+#if !__has_feature(objc_arc)
+#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
+#endif
+
+#import "whisper-encoder-impl.h"
+
+@implementation whisper_encoder_implInput
+
+- (instancetype)initWithLogmel_data:(MLMultiArray *)logmel_data {
+ self = [super init];
+ if (self) {
+ _logmel_data = logmel_data;
+ }
+ return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+ return [NSSet setWithArray:@[@"logmel_data"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+ if ([featureName isEqualToString:@"logmel_data"]) {
+ return [MLFeatureValue featureValueWithMultiArray:self.logmel_data];
+ }
+ return nil;
+}
+
+@end
+
+@implementation whisper_encoder_implOutput
+
+- (instancetype)initWithOutput:(MLMultiArray *)output {
+ self = [super init];
+ if (self) {
+ _output = output;
+ }
+ return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+ return [NSSet setWithArray:@[@"output"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+ if ([featureName isEqualToString:@"output"]) {
+ return [MLFeatureValue featureValueWithMultiArray:self.output];
+ }
+ return nil;
+}
+
+@end
+
+@implementation whisper_encoder_impl
+
+
+/**
+ URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle {
+ NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"whisper_encoder_impl" ofType:@"mlmodelc"];
+ if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load whisper-encoder-impl.mlmodelc in the bundle resource"); return nil; }
+ return [NSURL fileURLWithPath:assetPath];
+}
+
+
+/**
+ Initialize whisper_encoder_impl instance from an existing MLModel object.
+
+ Usually the application does not use this initializer unless it makes a subclass of whisper_encoder_impl.
+ Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model {
+ self = [super init];
+ if (!self) { return nil; }
+ _model = model;
+ if (_model == nil) { return nil; }
+ return self;
+}
+
+
+/**
+ Initialize whisper_encoder_impl instance with the model in this bundle.
+*/
+- (nullable instancetype)init {
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
+}
+
+
+/**
+ Initialize whisper_encoder_impl instance with the model in this bundle.
+
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
+}
+
+
+/**
+ Initialize whisper_encoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
+ if (model == nil) { return nil; }
+ return [self initWithMLModel:model];
+}
+
+
+/**
+ Initialize whisper_encoder_impl instance from the model URL.
+
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+ @param configuration The model configuration object
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
+ if (model == nil) { return nil; }
+ return [self initWithMLModel:model];
+}
+
+
+/**
+ Construct whisper_encoder_impl instance asynchronously with configuration.
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
+ [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
+ configuration:configuration
+ completionHandler:handler];
+}
+
+
+/**
+ Construct whisper_encoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+ @param modelURL The model URL.
+ @param configuration The model configuration
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
+ [MLModel loadContentsOfURL:modelURL
+ configuration:configuration
+ completionHandler:^(MLModel *model, NSError *error) {
+ if (model != nil) {
+ whisper_encoder_impl *typedModel = [[whisper_encoder_impl alloc] initWithMLModel:model];
+ handler(typedModel, nil);
+ } else {
+ handler(nil, error);
+ }
+ }];
+}
+
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
+}
+
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
+ if (!outFeatures) { return nil; }
+ return [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
+}
+
+- (nullable whisper_encoder_implOutput *)predictionFromLogmel_data:(MLMultiArray *)logmel_data error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ whisper_encoder_implInput *input_ = [[whisper_encoder_implInput alloc] initWithLogmel_data:logmel_data];
+ return [self predictionFromFeatures:input_ error:error];
+}
+
+- (nullable NSArray<whisper_encoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_encoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+ id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
+ id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
+ if (!outBatch) { return nil; }
+ NSMutableArray<whisper_encoder_implOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
+ for (NSInteger i = 0; i < outBatch.count; i++) {
+ id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
+ whisper_encoder_implOutput * result = [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
+ [results addObject:result];
+ }
+ return results;
+}
+
+@end
--- /dev/null
+// Wrapper of the Core ML Whisper Encoder model
+//
+// Code is derived from the work of Github user @wangchou
+// ref: https://github.com/wangchou/callCoreMLFromCpp
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_coreml_context;
+
+struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
+void whisper_coreml_free(struct whisper_coreml_context * ctx);
+
+void whisper_coreml_encode(
+ const whisper_coreml_context * ctx,
+ float * mel,
+ float * out);
+
+#if __cplusplus
+}
+#endif
--- /dev/null
+#import "coreml/whisper-encoder.h"
+#import "coreml/whisper-encoder-impl.h"
+
+#import <CoreML/CoreML.h>
+
+#include <stdlib.h>
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_coreml_context {
+ const void * data;
+};
+
+struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
+ NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
+
+ NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
+
+ const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model error:nil]);
+
+ if (data == NULL) {
+ return NULL;
+ }
+
+ whisper_coreml_context * ctx = new whisper_coreml_context;
+
+ ctx->data = data;
+
+ return ctx;
+}
+
+void whisper_coreml_free(struct whisper_coreml_context * ctx) {
+ CFRelease(ctx->data);
+ delete ctx;
+}
+
+void whisper_coreml_encode(
+ const whisper_coreml_context * ctx,
+ float * mel,
+ float * out) {
+ MLMultiArray * inMultiArray = [
+ [MLMultiArray alloc] initWithDataPointer: mel
+ shape: @[@1, @80, @3000]
+ dataType: MLMultiArrayDataTypeFloat32
+ strides: @[@(240000), @(3000), @1]
+ deallocator: nil
+ error: nil
+ ];
+
+ whisper_encoder_implOutput * outCoreML = [(__bridge id) ctx->data predictionFromLogmel_data:inMultiArray error:nil];
+
+ MLMultiArray * outMA = outCoreML.output;
+
+ //NSArray<NSNumber *> * shape = outMA.shape;
+ //NSArray<NSNumber *> * strides = outMA.strides;
+
+ //printf("shape: %ld %ld %ld %ld\n", [shape[0] longValue], [shape[1] longValue], [shape[2] longValue], [shape[3] longValue]);
+ //printf("strides: %ld %ld %ld %ld\n", [strides[0] longValue], [strides[1] longValue], [strides[2] longValue], [strides[3] longValue]);
+
+ memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
+}
+
+#if __cplusplus
+}
+#endif
config="$config BLAS"
fi
+ if [[ $system_info == *"COREML = 1"* ]]; then
+ config="$config COREML"
+ fi
+
commit=$(git rev-parse --short HEAD)
printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time | $commit |\n"
--- /dev/null
+import argparse
+import torch
+import torch.nn.functional as F
+import coremltools as ct
+
+from torch import Tensor
+from torch import nn
+from typing import Dict
+from typing import Optional
+from ane_transformers.reference.layer_norm import LayerNormANE as LayerNormANEBase
+from coremltools.models.neural_network.quantization_utils import quantize_weights
+from whisper.model import Whisper, AudioEncoder, TextDecoder, ResidualAttentionBlock, MultiHeadAttention, ModelDimensions
+from whisper import load_model
+
+# Use for changing dim of input in encoder and decoder embeddings
+def linear_to_conv2d_map(state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ """
+ Unsqueeze twice to map nn.Linear weights to nn.Conv2d weights
+ """
+ for k in state_dict:
+ is_attention = all(substr in k for substr in ['attn', '.weight'])
+ is_mlp = any([k.endswith(s) for s in ['mlp.0.weight', 'mlp.2.weight']])
+
+ if (is_attention or is_mlp) and len(state_dict[k].shape) == 2:
+ state_dict[k] = state_dict[k][:, :, None, None]
+
+
+def correct_for_bias_scale_order_inversion(state_dict, prefix, local_metadata,
+ strict, missing_keys,
+ unexpected_keys, error_msgs):
+ state_dict[prefix + 'bias'] = state_dict[prefix + 'bias'] / state_dict[prefix + 'weight']
+ return state_dict
+
+class LayerNormANE(LayerNormANEBase):
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._register_load_state_dict_pre_hook(
+ correct_for_bias_scale_order_inversion)
+
+class MultiHeadAttentionANE(MultiHeadAttention):
+ def __init__(self, n_state: int, n_head: int):
+ super().__init__(n_state, n_head)
+
+ setattr(self, 'query', nn.Conv2d(n_state, n_state, kernel_size=1))
+ setattr(self, 'key', nn.Conv2d(n_state, n_state, kernel_size=1, bias=False))
+ setattr(self, 'value', nn.Conv2d(n_state, n_state, kernel_size=1))
+ setattr(self, 'out', nn.Conv2d(n_state, n_state, kernel_size=1))
+
+ def forward(self,
+ x: Tensor,
+ xa: Optional[Tensor] = None,
+ mask: Optional[Tensor] = None,
+ kv_cache: Optional[dict] = None):
+
+ q = self.query(x)
+
+ if kv_cache is None or xa is None or self.key not in kv_cache:
+ # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
+ # otherwise, perform key/value projections for self- or cross-attention as usual.
+ k = self.key(x if xa is None else xa)
+ v = self.value(x if xa is None else xa)
+
+ else:
+ # for cross-attention, calculate keys and values once and reuse in subsequent calls.
+ k = kv_cache[self.key]
+ v = kv_cache[self.value]
+
+ wv, qk = self.qkv_attention_ane(q, k, v, mask)
+
+ return self.out(wv), qk
+
+ def qkv_attention_ane(self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None):
+
+ _, dim, _, seqlen = q.size()
+
+ dim_per_head = dim // self.n_head
+
+ scale = float(dim_per_head)**-0.5
+
+ q = q * scale
+
+ mh_q = q.split(dim_per_head, dim=1)
+ mh_k = k.transpose(1,3).split(dim_per_head, dim=3)
+ mh_v = v.split(dim_per_head, dim=1)
+
+ mh_qk = [
+ torch.einsum('bchq,bkhc->bkhq', [qi, ki])
+ for qi, ki in zip(mh_q, mh_k)
+ ] # (batch_size, max_seq_length, 1, max_seq_length) * n_heads
+
+ if mask is not None:
+ for head_idx in range(self.n_head):
+ mh_qk[head_idx] = mh_qk[head_idx] + mask[:, :seqlen, :, :seqlen]
+
+ attn_weights = [aw.softmax(dim=1) for aw in mh_qk] # (batch_size, max_seq_length, 1, max_seq_length) * n_heads
+ attn = [torch.einsum('bkhq,bchk->bchq', wi, vi) for wi, vi in zip(attn_weights, mh_v)] # (batch_size, dim_per_head, 1, max_seq_length) * n_heads
+ attn = torch.cat(attn, dim=1) # (batch_size, dim, 1, max_seq_length)
+
+ return attn, torch.cat(mh_qk, dim=1).float().detach()
+
+
+class ResidualAttentionBlockANE(ResidualAttentionBlock):
+ def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
+ super().__init__(n_state, n_head, cross_attention)
+
+ setattr(self, 'attn', MultiHeadAttentionANE(n_state, n_head))
+ setattr(self, 'attn_ln', LayerNormANE(n_state))
+
+ setattr(self, 'cross_attn', MultiHeadAttentionANE(n_state, n_head) if cross_attention else None)
+ setattr(self, 'cross_attn_ln', LayerNormANE(n_state) if cross_attention else None)
+
+ n_mlp = n_state * 4
+ setattr(self, 'mlp', nn.Sequential(
+ nn.Conv2d(n_state, n_mlp, kernel_size=1),
+ nn.GELU(),
+ nn.Conv2d(n_mlp, n_state, kernel_size=1)
+ ))
+ setattr(self, 'mlp_ln', LayerNormANE(n_state))
+
+
+class AudioEncoderANE(AudioEncoder):
+ def __init__(self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
+ super().__init__(n_mels, n_ctx, n_state, n_head, n_layer)
+
+ setattr(self, 'blocks', nn.ModuleList(
+ [ResidualAttentionBlockANE(n_state, n_head) for _ in range(n_layer)]
+ ))
+ setattr(self, 'ln_post', LayerNormANE(n_state))
+
+ def forward(self, x: Tensor):
+ """
+ x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
+ the mel spectrogram of the audio
+ """
+ x = F.gelu(self.conv1(x))
+ x = F.gelu(self.conv2(x))
+
+ assert x.shape[1:] == self.positional_embedding.shape[::-1], "incorrect audio shape"
+
+ # Add positional embedding and add dummy dim for ANE
+ x = (x + self.positional_embedding.transpose(0,1)).to(x.dtype).unsqueeze(2)
+
+ for block in self.blocks:
+ x = block(x)
+
+ x = self.ln_post(x)
+
+ # """
+ # TODO:
+ # I think we need to transpose the result here to make it fit whisper.cpp memory order.
+ # However, even doing this, the results are still wrong. Kind of less wrong compared to
+ # not transposing, but still wrong.
+
+ # Also, I don't know why the original OpenAI implementation does not need to transpose
+
+ # transpose to (batch_size, n_ctx, n_state)
+ # x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx)
+
+ # """
+ # x = x.transpose(1,3)
+
+ return x
+
+class TextDecoderANE(TextDecoder):
+
+ def __init__(self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
+ super().__init__(n_vocab, n_ctx, n_state, n_head, n_layer)
+
+ setattr(self, 'blocks', nn.ModuleList(
+ [ResidualAttentionBlockANE(n_state, n_head, cross_attention=True) for _ in range(n_layer)]
+ ))
+ setattr(self, 'ln', LayerNormANE(n_state))
+
+ def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
+ """
+ x : torch.LongTensor, shape = (batch_size, <= n_ctx)
+ the text tokens
+ xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
+ the encoded audio features to be attended on
+ """
+ offset = next(iter(kv_cache.values())).shape[3] if kv_cache else 0
+ x = self.token_embedding(x) + self.positional_embedding[offset : offset + x.shape[-1]]
+ x = x.to(xa.dtype)
+
+ # Reformat for ANE
+ mask = self.mask[None, None, :, :].permute(0,3,1,2)
+ x = x.transpose(1,2).unsqueeze(2)
+
+ for block in self.blocks:
+ x = block(x, xa, mask=mask, kv_cache=kv_cache)
+
+ x = self.ln(x)
+
+ # Reformat back from ANE
+ x = x.permute(0,2,3,1).squeeze(0)
+
+ # ANE can only load tensors with dim size of at most 16,384 - whisper uses 51,864 (en) or 51,865 (multi-lang) tokens so we need to compute in chunks
+ if self.token_embedding.weight.shape[0] == 51865:
+ # split in 11 chunks - 4715 each
+ splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//11, dim=0)
+ logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1)
+ else:
+ # split in 12 chunks - 4322 each
+ assert(self.token_embedding.weight.shape[0] == 51864)
+ splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//12, dim=0)
+ logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1)
+
+ return logits
+
+class WhisperANE(Whisper):
+ def __init__(self, dims: ModelDimensions):
+ super().__init__(dims)
+
+ setattr(self, 'encoder', AudioEncoderANE(
+ self.dims.n_mels,
+ self.dims.n_audio_ctx,
+ self.dims.n_audio_state,
+ self.dims.n_audio_head,
+ self.dims.n_audio_layer,
+ ))
+ setattr(self, 'decoder', TextDecoderANE(
+ self.dims.n_vocab,
+ self.dims.n_text_ctx,
+ self.dims.n_text_state,
+ self.dims.n_text_head,
+ self.dims.n_text_layer,
+ ))
+
+ self._register_load_state_dict_pre_hook(linear_to_conv2d_map)
+
+ def forward(self, mel: torch.Tensor, tokens: torch.Tensor) -> Dict[str, torch.Tensor]:
+ return self.decoder(tokens, self.encoder(mel))
+
+ def install_kv_cache_hooks(self, cache: Optional[dict] = None):
+ cache = {**cache} if cache is not None else {}
+ hooks = []
+
+ def save_to_cache(module, _, output):
+ if module not in cache or output.shape[3] > self.decoder.positional_embedding.shape[0]:
+ cache[module] = output # save as-is, for the first token or cross attention
+ else:
+ cache[module] = torch.cat([cache[module], output], dim=3).detach()
+ return cache[module]
+
+ def install_hooks(layer: nn.Module):
+ if isinstance(layer, MultiHeadAttentionANE):
+ hooks.append(layer.key.register_forward_hook(save_to_cache))
+ hooks.append(layer.value.register_forward_hook(save_to_cache))
+
+ self.decoder.apply(install_hooks)
+ return cache, hooks
+
+def convert_encoder(hparams, model, quantize=False):
+ model.eval()
+
+ input_shape = (1, 80, 3000)
+ input_data = torch.randn(input_shape)
+ traced_model = torch.jit.trace(model, input_data)
+
+ model = ct.convert(
+ traced_model,
+ convert_to=None if quantize else "mlprogram", # convert will fail if weights are quantized, not sure why
+ inputs=[ct.TensorType(name="logmel_data", shape=input_shape)],
+ outputs=[ct.TensorType(name="output")],
+ compute_units=ct.ComputeUnit.ALL
+ )
+
+ if quantize:
+ model = quantize_weights(model, nbits=16)
+
+ return model
+
+def convert_decoder(hparams, model, quantize=False):
+ model.eval()
+
+ tokens_shape = (1, 1)
+ audio_shape = (1, hparams.n_audio_state, 1, 1500)
+
+ audio_data = torch.randn(audio_shape)
+ token_data = torch.randint(50257, tokens_shape).long()
+ traced_model = torch.jit.trace(model, (token_data, audio_data))
+
+ model = ct.convert(
+ traced_model,
+ convert_to=None if quantize else "mlprogram", # convert will fail if weights are quantized, not sure why
+ inputs=[
+ ct.TensorType(name="token_data", shape=tokens_shape, dtype=int),
+ ct.TensorType(name="audio_data", shape=audio_shape)
+ ]
+ )
+
+ if quantize:
+ model = quantize_weights(model, nbits=16)
+
+ return model
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large)", required=True)
+ parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
+ parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
+ parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
+ args = parser.parse_args()
+
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large"]:
+ raise ValueError("Invalid model name")
+
+ whisper = load_model(args.model).cpu()
+ hparams = whisper.dims
+ print(hparams)
+
+ if args.optimize_ane:
+ whisperANE = WhisperANE(hparams).eval()
+ whisperANE.load_state_dict(whisper.state_dict())
+
+ encoder = whisperANE.encoder
+ decoder = whisperANE.decoder
+ else:
+ encoder = whisper.encoder
+ decoder = whisper.decoder
+
+ # Convert encoder
+ encoder = convert_encoder(hparams, encoder, quantize=args.quantize)
+ encoder.save(f"models/coreml-encoder-{args.model}.mlpackage")
+
+ if args.encoder_only is False:
+ # Convert decoder
+ decoder = convert_decoder(hparams, decoder, quantize=args.quantize)
+ decoder.save(f"models/coreml-decoder-{args.model}.mlpackage")
+
+ print("done converting")
--- /dev/null
+#!/bin/bash
+
+# This script downloads Whisper model files that have already been converted to Core ML format.
+# This way you don't have to convert them yourself.
+
+src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
+pfx="resolve/main/ggml"
+
+# get the path of this script
+function get_script_path() {
+ if [ -x "$(command -v realpath)" ]; then
+ echo "$(dirname $(realpath $0))"
+ else
+ local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
+ echo "$ret"
+ fi
+}
+
+models_path="$(get_script_path)"
+
+# Whisper models
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+
+# list available models
+function list_models {
+ printf "\n"
+ printf " Available models:"
+ for model in "${models[@]}"; do
+ printf " $model"
+ done
+ printf "\n\n"
+}
+
+if [ "$#" -ne 1 ]; then
+ printf "Usage: $0 <model>\n"
+ list_models
+
+ exit 1
+fi
+
+model=$1
+
+if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
+ printf "Invalid model: $model\n"
+ list_models
+
+ exit 1
+fi
+
+# download Core ML model
+
+printf "Downloading Core ML model $model from '$src' ...\n"
+
+cd $models_path
+
+if [ -f "ggml-$model.mlmodel" ]; then
+ printf "Model $model already exists. Skipping download.\n"
+ exit 0
+fi
+
+if [ -x "$(command -v wget)" ]; then
+ wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+elif [ -x "$(command -v curl)" ]; then
+ curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+else
+ printf "Either wget or curl is required to download models.\n"
+ exit 1
+fi
+
+
+if [ $? -ne 0 ]; then
+ printf "Failed to download Core ML model $model \n"
+ printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
+ exit 1
+fi
+
+printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
+printf "Run the following command to compile it:\n\n"
+printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
+printf "You can now use it like this:\n\n"
+printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
+printf "\n"
--- /dev/null
+#!/bin/bash
+#
+# This generates:
+# - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
+# - coreml/whisper-decoder-impl.h and coreml/whisper-decoder-impl.m
+#
+
+wd=$(dirname "$0")
+cd "$wd/../"
+
+python3 models/convert-whisper-to-coreml.py --model tiny.en
+
+mv -v models/coreml-encoder-tiny.en.mlpackage models/whisper-encoder-impl.mlpackage
+xcrun coremlc generate models/whisper-encoder-impl.mlpackage coreml/
+mv coreml/whisper_encoder_impl.h coreml/whisper-encoder-impl.h
+mv coreml/whisper_encoder_impl.m coreml/whisper-encoder-impl.m
+sed -i '' 's/whisper_encoder_impl\.h/whisper-encoder-impl.h/g' coreml/whisper-encoder-impl.m
+sed -i '' 's/whisper_encoder_impl\.m/whisper-encoder-impl.m/g' coreml/whisper-encoder-impl.m
+sed -i '' 's/whisper_encoder_impl\.h/whisper-encoder-impl.h/g' coreml/whisper-encoder-impl.h
+
+mv -v models/coreml-decoder-tiny.en.mlpackage models/whisper-decoder-impl.mlpackage
+xcrun coremlc generate models/whisper-decoder-impl.mlpackage coreml/
+mv coreml/whisper_decoder_impl.h coreml/whisper-decoder-impl.h
+mv coreml/whisper_decoder_impl.m coreml/whisper-decoder-impl.m
+sed -i '' 's/whisper_decoder_impl\.h/whisper-decoder-impl.h/g' coreml/whisper-decoder-impl.m
+sed -i '' 's/whisper_decoder_impl\.m/whisper-decoder-impl.m/g' coreml/whisper-decoder-impl.m
+sed -i '' 's/whisper_decoder_impl\.h/whisper-decoder-impl.h/g' coreml/whisper-decoder-impl.h
+
+rm -rfv models/whisper-encoder-impl.mlpackage models/whisper-decoder-impl.mlpackage
--- /dev/null
+#!/bin/bash
+
+# Usage: ./generate-coreml-model.sh <model-name>
+if [ $# -eq 0 ]
+ then
+ echo "No model name supplied"
+ echo "Usage: ./generate-coreml-model.sh <model-name>"
+ exit 1
+fi
+
+mname="$1"
+
+wd=$(dirname "$0")
+cd "$wd/../"
+
+python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
+
+xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
+rm -rf models/ggml-${mname}-encoder.mlmodelc
+mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc
+
+# TODO: decoder (sometime in the future maybe)
+#xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
+#rm -rf models/ggml-${mname}-decoder.mlmodelc
+#mv -v models/coreml_decoder_${mname}.mlmodelc models/ggml-${mname}-decoder.mlmodelc
#define WHISPER_BUILD
#include "whisper.h"
+#if WHISPER_USE_COREML
+#include "coreml/whisper-encoder.h"
+#endif
#include "ggml.h"
int lang_id = 0; // english by default
+ std::string path_model; // populated by whisper_init_from_file()
+#ifdef WHISPER_USE_COREML
+ whisper_coreml_context * ctx_coreml;
+#endif
+
// [EXPERIMENTAL] token-level timestamps data
int64_t t_beg = 0;
int64_t t_last = 0;
}
}
+#ifndef WHISPER_USE_COREML
struct ggml_tensor * cur;
// convolution + gelu
//ggml_graph_print(&gf);
}
+#else
+ wstate.use_buf(ctx0, -1);
+
+ struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
+
+ whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
+#endif
// cur
//{
// interface implementation
//
+#ifdef WHISPER_USE_COREML
+// replace .bin with -encoder.mlmodelc
+static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
+ auto pos = path_bin.rfind('.');
+ if (pos != std::string::npos) {
+ path_bin = path_bin.substr(0, pos);
+ }
+
+ path_bin += "-encoder.mlmodelc";
+
+ return path_bin;
+}
+#endif
+
struct whisper_state * whisper_init_state(whisper_context * ctx) {
whisper_state * state = new whisper_state;
fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
}
+#ifdef WHISPER_USE_COREML
+ const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model);
+
+ fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
+ fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
+
+ state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
+ if (!state->ctx_coreml) {
+ fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
+ return nullptr;
+ }
+
+ fprintf(stderr, "%s: Core ML model loaded\n", __func__);
+#endif
+
state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
state->logits_id.reserve(ctx->model.hparams.n_vocab);
}
loader.context = &fin;
+
loader.read = [](void * ctx, void * output, size_t read_size) {
std::ifstream * fin = (std::ifstream*)ctx;
fin->read((char *)output, read_size);
kv_cache_free(state->decoders[i].kv_self);
}
+#ifdef WHISPER_USE_COREML
+ whisper_coreml_free(state->ctx_coreml);
+ state->ctx_coreml = nullptr;
+#endif
+
delete state;
}
}
}
}
+static int whisper_has_coreml(void) {
+#ifdef WHISPER_USE_COREML
+ return 1;
+#else
+ return 0;
+#endif
+}
+
const char * whisper_print_system_info(void) {
static std::string s;
s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
+ s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
return s.c_str();
}